123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746 |
- //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This pass performs various transformations related to eliminating memcpy
- // calls, or transforming sets of stores into memset's.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
- #include "llvm/ADT/DenseSet.h"
- #include "llvm/ADT/STLExtras.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/ADT/Statistic.h"
- #include "llvm/ADT/iterator_range.h"
- #include "llvm/Analysis/AliasAnalysis.h"
- #include "llvm/Analysis/AssumptionCache.h"
- #include "llvm/Analysis/CaptureTracking.h"
- #include "llvm/Analysis/GlobalsModRef.h"
- #include "llvm/Analysis/Loads.h"
- #include "llvm/Analysis/MemoryLocation.h"
- #include "llvm/Analysis/MemorySSA.h"
- #include "llvm/Analysis/MemorySSAUpdater.h"
- #include "llvm/Analysis/TargetLibraryInfo.h"
- #include "llvm/Analysis/ValueTracking.h"
- #include "llvm/IR/BasicBlock.h"
- #include "llvm/IR/Constants.h"
- #include "llvm/IR/DataLayout.h"
- #include "llvm/IR/DerivedTypes.h"
- #include "llvm/IR/Dominators.h"
- #include "llvm/IR/Function.h"
- #include "llvm/IR/GlobalVariable.h"
- #include "llvm/IR/IRBuilder.h"
- #include "llvm/IR/InstrTypes.h"
- #include "llvm/IR/Instruction.h"
- #include "llvm/IR/Instructions.h"
- #include "llvm/IR/IntrinsicInst.h"
- #include "llvm/IR/Intrinsics.h"
- #include "llvm/IR/LLVMContext.h"
- #include "llvm/IR/Module.h"
- #include "llvm/IR/PassManager.h"
- #include "llvm/IR/Type.h"
- #include "llvm/IR/User.h"
- #include "llvm/IR/Value.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Pass.h"
- #include "llvm/Support/Casting.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/MathExtras.h"
- #include "llvm/Support/raw_ostream.h"
- #include "llvm/Transforms/Scalar.h"
- #include "llvm/Transforms/Utils/Local.h"
- #include <algorithm>
- #include <cassert>
- #include <cstdint>
- #include <optional>
- using namespace llvm;
- #define DEBUG_TYPE "memcpyopt"
- static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
- "enable-memcpyopt-without-libcalls", cl::Hidden,
- cl::desc("Enable memcpyopt even when libcalls are disabled"));
- STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
- STATISTIC(NumMemSetInfer, "Number of memsets inferred");
- STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
- STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
- STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
- namespace {
- /// Represents a range of memset'd bytes with the ByteVal value.
- /// This allows us to analyze stores like:
- /// store 0 -> P+1
- /// store 0 -> P+0
- /// store 0 -> P+3
- /// store 0 -> P+2
- /// which sometimes happens with stores to arrays of structs etc. When we see
- /// the first store, we make a range [1, 2). The second store extends the range
- /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
- /// two ranges into [0, 3) which is memset'able.
- struct MemsetRange {
- // Start/End - A semi range that describes the span that this range covers.
- // The range is closed at the start and open at the end: [Start, End).
- int64_t Start, End;
- /// StartPtr - The getelementptr instruction that points to the start of the
- /// range.
- Value *StartPtr;
- /// Alignment - The known alignment of the first store.
- MaybeAlign Alignment;
- /// TheStores - The actual stores that make up this range.
- SmallVector<Instruction*, 16> TheStores;
- bool isProfitableToUseMemset(const DataLayout &DL) const;
- };
- } // end anonymous namespace
- bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
- // If we found more than 4 stores to merge or 16 bytes, use memset.
- if (TheStores.size() >= 4 || End-Start >= 16) return true;
- // If there is nothing to merge, don't do anything.
- if (TheStores.size() < 2) return false;
- // If any of the stores are a memset, then it is always good to extend the
- // memset.
- for (Instruction *SI : TheStores)
- if (!isa<StoreInst>(SI))
- return true;
- // Assume that the code generator is capable of merging pairs of stores
- // together if it wants to.
- if (TheStores.size() == 2) return false;
- // If we have fewer than 8 stores, it can still be worthwhile to do this.
- // For example, merging 4 i8 stores into an i32 store is useful almost always.
- // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
- // memset will be split into 2 32-bit stores anyway) and doing so can
- // pessimize the llvm optimizer.
- //
- // Since we don't have perfect knowledge here, make some assumptions: assume
- // the maximum GPR width is the same size as the largest legal integer
- // size. If so, check to see whether we will end up actually reducing the
- // number of stores used.
- unsigned Bytes = unsigned(End-Start);
- unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
- if (MaxIntSize == 0)
- MaxIntSize = 1;
- unsigned NumPointerStores = Bytes / MaxIntSize;
- // Assume the remaining bytes if any are done a byte at a time.
- unsigned NumByteStores = Bytes % MaxIntSize;
- // If we will reduce the # stores (according to this heuristic), do the
- // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
- // etc.
- return TheStores.size() > NumPointerStores+NumByteStores;
- }
- namespace {
- class MemsetRanges {
- using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
- /// A sorted list of the memset ranges.
- SmallVector<MemsetRange, 8> Ranges;
- const DataLayout &DL;
- public:
- MemsetRanges(const DataLayout &DL) : DL(DL) {}
- using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
- const_iterator begin() const { return Ranges.begin(); }
- const_iterator end() const { return Ranges.end(); }
- bool empty() const { return Ranges.empty(); }
- void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
- if (auto *SI = dyn_cast<StoreInst>(Inst))
- addStore(OffsetFromFirst, SI);
- else
- addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
- }
- void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
- TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
- assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
- addRange(OffsetFromFirst, StoreSize.getFixedValue(),
- SI->getPointerOperand(), SI->getAlign(), SI);
- }
- void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
- int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
- }
- void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
- Instruction *Inst);
- };
- } // end anonymous namespace
- /// Add a new store to the MemsetRanges data structure. This adds a
- /// new range for the specified store at the specified offset, merging into
- /// existing ranges as appropriate.
- void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
- MaybeAlign Alignment, Instruction *Inst) {
- int64_t End = Start+Size;
- range_iterator I = partition_point(
- Ranges, [=](const MemsetRange &O) { return O.End < Start; });
- // We now know that I == E, in which case we didn't find anything to merge
- // with, or that Start <= I->End. If End < I->Start or I == E, then we need
- // to insert a new range. Handle this now.
- if (I == Ranges.end() || End < I->Start) {
- MemsetRange &R = *Ranges.insert(I, MemsetRange());
- R.Start = Start;
- R.End = End;
- R.StartPtr = Ptr;
- R.Alignment = Alignment;
- R.TheStores.push_back(Inst);
- return;
- }
- // This store overlaps with I, add it.
- I->TheStores.push_back(Inst);
- // At this point, we may have an interval that completely contains our store.
- // If so, just add it to the interval and return.
- if (I->Start <= Start && I->End >= End)
- return;
- // Now we know that Start <= I->End and End >= I->Start so the range overlaps
- // but is not entirely contained within the range.
- // See if the range extends the start of the range. In this case, it couldn't
- // possibly cause it to join the prior range, because otherwise we would have
- // stopped on *it*.
- if (Start < I->Start) {
- I->Start = Start;
- I->StartPtr = Ptr;
- I->Alignment = Alignment;
- }
- // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
- // is in or right at the end of I), and that End >= I->Start. Extend I out to
- // End.
- if (End > I->End) {
- I->End = End;
- range_iterator NextI = I;
- while (++NextI != Ranges.end() && End >= NextI->Start) {
- // Merge the range in.
- I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
- if (NextI->End > I->End)
- I->End = NextI->End;
- Ranges.erase(NextI);
- NextI = I;
- }
- }
- }
- //===----------------------------------------------------------------------===//
- // MemCpyOptLegacyPass Pass
- //===----------------------------------------------------------------------===//
- namespace {
- class MemCpyOptLegacyPass : public FunctionPass {
- MemCpyOptPass Impl;
- public:
- static char ID; // Pass identification, replacement for typeid
- MemCpyOptLegacyPass() : FunctionPass(ID) {
- initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
- private:
- // This transformation requires dominator postdominator info
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
- };
- } // end anonymous namespace
- char MemCpyOptLegacyPass::ID = 0;
- /// The public interface to this file...
- FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
- INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
- false, false)
- INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
- INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
- INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
- false, false)
- // Check that V is either not accessible by the caller, or unwinding cannot
- // occur between Start and End.
- static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
- Instruction *End) {
- assert(Start->getParent() == End->getParent() && "Must be in same block");
- // Function can't unwind, so it also can't be visible through unwinding.
- if (Start->getFunction()->doesNotThrow())
- return false;
- // Object is not visible on unwind.
- // TODO: Support RequiresNoCaptureBeforeUnwind case.
- bool RequiresNoCaptureBeforeUnwind;
- if (isNotVisibleOnUnwind(getUnderlyingObject(V),
- RequiresNoCaptureBeforeUnwind) &&
- !RequiresNoCaptureBeforeUnwind)
- return false;
- // Check whether there are any unwinding instructions in the range.
- return any_of(make_range(Start->getIterator(), End->getIterator()),
- [](const Instruction &I) { return I.mayThrow(); });
- }
- void MemCpyOptPass::eraseInstruction(Instruction *I) {
- MSSAU->removeMemoryAccess(I);
- I->eraseFromParent();
- }
- // Check for mod or ref of Loc between Start and End, excluding both boundaries.
- // Start and End must be in the same block.
- // If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
- // intrinsic and store it inside SkippedLifetimeStart.
- static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc,
- const MemoryUseOrDef *Start,
- const MemoryUseOrDef *End,
- Instruction **SkippedLifetimeStart = nullptr) {
- assert(Start->getBlock() == End->getBlock() && "Only local supported");
- for (const MemoryAccess &MA :
- make_range(++Start->getIterator(), End->getIterator())) {
- Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
- if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
- auto *II = dyn_cast<IntrinsicInst>(I);
- if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
- SkippedLifetimeStart && !*SkippedLifetimeStart) {
- *SkippedLifetimeStart = I;
- continue;
- }
- return true;
- }
- }
- return false;
- }
- // Check for mod of Loc between Start and End, excluding both boundaries.
- // Start and End can be in different blocks.
- static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
- MemoryLocation Loc, const MemoryUseOrDef *Start,
- const MemoryUseOrDef *End) {
- if (isa<MemoryUse>(End)) {
- // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
- // Manually check read accesses between Start and End, if they are in the
- // same block, for clobbers. Otherwise assume Loc is clobbered.
- return Start->getBlock() != End->getBlock() ||
- any_of(
- make_range(std::next(Start->getIterator()), End->getIterator()),
- [&AA, Loc](const MemoryAccess &Acc) {
- if (isa<MemoryUse>(&Acc))
- return false;
- Instruction *AccInst =
- cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
- return isModSet(AA.getModRefInfo(AccInst, Loc));
- });
- }
- // TODO: Only walk until we hit Start.
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- End->getDefiningAccess(), Loc, AA);
- return !MSSA->dominates(Clobber, Start);
- }
- /// When scanning forward over instructions, we look for some other patterns to
- /// fold away. In particular, this looks for stores to neighboring locations of
- /// memory. If it sees enough consecutive ones, it attempts to merge them
- /// together into a memcpy/memset.
- Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
- Value *StartPtr,
- Value *ByteVal) {
- const DataLayout &DL = StartInst->getModule()->getDataLayout();
- // We can't track scalable types
- if (auto *SI = dyn_cast<StoreInst>(StartInst))
- if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
- return nullptr;
- // Okay, so we now have a single store that can be splatable. Scan to find
- // all subsequent stores of the same value to offset from the same pointer.
- // Join these together into ranges, so we can decide whether contiguous blocks
- // are stored.
- MemsetRanges Ranges(DL);
- BasicBlock::iterator BI(StartInst);
- // Keeps track of the last memory use or def before the insertion point for
- // the new memset. The new MemoryDef for the inserted memsets will be inserted
- // after MemInsertPoint. It points to either LastMemDef or to the last user
- // before the insertion point of the memset, if there are any such users.
- MemoryUseOrDef *MemInsertPoint = nullptr;
- // Keeps track of the last MemoryDef between StartInst and the insertion point
- // for the new memset. This will become the defining access of the inserted
- // memsets.
- MemoryDef *LastMemDef = nullptr;
- for (++BI; !BI->isTerminator(); ++BI) {
- auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
- if (CurrentAcc) {
- MemInsertPoint = CurrentAcc;
- if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
- LastMemDef = CurrentDef;
- }
- // Calls that only access inaccessible memory do not block merging
- // accessible stores.
- if (auto *CB = dyn_cast<CallBase>(BI)) {
- if (CB->onlyAccessesInaccessibleMemory())
- continue;
- }
- if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
- // If the instruction is readnone, ignore it, otherwise bail out. We
- // don't even allow readonly here because we don't want something like:
- // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
- if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
- break;
- continue;
- }
- if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
- // If this is a store, see if we can merge it in.
- if (!NextStore->isSimple()) break;
- Value *StoredVal = NextStore->getValueOperand();
- // Don't convert stores of non-integral pointer types to memsets (which
- // stores integers).
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
- break;
- // We can't track ranges involving scalable types.
- if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
- break;
- // Check to see if this stored value is of the same byte-splattable value.
- Value *StoredByte = isBytewiseValue(StoredVal, DL);
- if (isa<UndefValue>(ByteVal) && StoredByte)
- ByteVal = StoredByte;
- if (ByteVal != StoredByte)
- break;
- // Check to see if this store is to a constant offset from the start ptr.
- std::optional<int64_t> Offset =
- isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
- if (!Offset)
- break;
- Ranges.addStore(*Offset, NextStore);
- } else {
- auto *MSI = cast<MemSetInst>(BI);
- if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
- !isa<ConstantInt>(MSI->getLength()))
- break;
- // Check to see if this store is to a constant offset from the start ptr.
- std::optional<int64_t> Offset =
- isPointerOffset(StartPtr, MSI->getDest(), DL);
- if (!Offset)
- break;
- Ranges.addMemSet(*Offset, MSI);
- }
- }
- // If we have no ranges, then we just had a single store with nothing that
- // could be merged in. This is a very common case of course.
- if (Ranges.empty())
- return nullptr;
- // If we had at least one store that could be merged in, add the starting
- // store as well. We try to avoid this unless there is at least something
- // interesting as a small compile-time optimization.
- Ranges.addInst(0, StartInst);
- // If we create any memsets, we put it right before the first instruction that
- // isn't part of the memset block. This ensure that the memset is dominated
- // by any addressing instruction needed by the start of the block.
- IRBuilder<> Builder(&*BI);
- // Now that we have full information about ranges, loop over the ranges and
- // emit memset's for anything big enough to be worthwhile.
- Instruction *AMemSet = nullptr;
- for (const MemsetRange &Range : Ranges) {
- if (Range.TheStores.size() == 1) continue;
- // If it is profitable to lower this range to memset, do so now.
- if (!Range.isProfitableToUseMemset(DL))
- continue;
- // Otherwise, we do want to transform this! Create a new memset.
- // Get the starting pointer of the block.
- StartPtr = Range.StartPtr;
- AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
- Range.Alignment);
- AMemSet->mergeDIAssignID(Range.TheStores);
- LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
- : Range.TheStores) dbgs()
- << *SI << '\n';
- dbgs() << "With: " << *AMemSet << '\n');
- if (!Range.TheStores.empty())
- AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
- assert(LastMemDef && MemInsertPoint &&
- "Both LastMemDef and MemInsertPoint need to be set");
- auto *NewDef =
- cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
- ? MSSAU->createMemoryAccessBefore(
- AMemSet, LastMemDef, MemInsertPoint)
- : MSSAU->createMemoryAccessAfter(
- AMemSet, LastMemDef, MemInsertPoint));
- MSSAU->insertDef(NewDef, /*RenameUses=*/true);
- LastMemDef = NewDef;
- MemInsertPoint = NewDef;
- // Zap all the stores.
- for (Instruction *SI : Range.TheStores)
- eraseInstruction(SI);
- ++NumMemSetInfer;
- }
- return AMemSet;
- }
- // This method try to lift a store instruction before position P.
- // It will lift the store and its argument + that anything that
- // may alias with these.
- // The method returns true if it was successful.
- bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
- // If the store alias this position, early bail out.
- MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
- return false;
- // Keep track of the arguments of all instruction we plan to lift
- // so we can make sure to lift them as well if appropriate.
- DenseSet<Instruction*> Args;
- auto AddArg = [&](Value *Arg) {
- auto *I = dyn_cast<Instruction>(Arg);
- if (I && I->getParent() == SI->getParent()) {
- // Cannot hoist user of P above P
- if (I == P) return false;
- Args.insert(I);
- }
- return true;
- };
- if (!AddArg(SI->getPointerOperand()))
- return false;
- // Instruction to lift before P.
- SmallVector<Instruction *, 8> ToLift{SI};
- // Memory locations of lifted instructions.
- SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
- // Lifted calls.
- SmallVector<const CallBase *, 8> Calls;
- const MemoryLocation LoadLoc = MemoryLocation::get(LI);
- for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
- auto *C = &*I;
- // Make sure hoisting does not perform a store that was not guaranteed to
- // happen.
- if (!isGuaranteedToTransferExecutionToSuccessor(C))
- return false;
- bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
- bool NeedLift = false;
- if (Args.erase(C))
- NeedLift = true;
- else if (MayAlias) {
- NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
- return isModOrRefSet(AA->getModRefInfo(C, ML));
- });
- if (!NeedLift)
- NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
- return isModOrRefSet(AA->getModRefInfo(C, Call));
- });
- }
- if (!NeedLift)
- continue;
- if (MayAlias) {
- // Since LI is implicitly moved downwards past the lifted instructions,
- // none of them may modify its source.
- if (isModSet(AA->getModRefInfo(C, LoadLoc)))
- return false;
- else if (const auto *Call = dyn_cast<CallBase>(C)) {
- // If we can't lift this before P, it's game over.
- if (isModOrRefSet(AA->getModRefInfo(P, Call)))
- return false;
- Calls.push_back(Call);
- } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
- // If we can't lift this before P, it's game over.
- auto ML = MemoryLocation::get(C);
- if (isModOrRefSet(AA->getModRefInfo(P, ML)))
- return false;
- MemLocs.push_back(ML);
- } else
- // We don't know how to lift this instruction.
- return false;
- }
- ToLift.push_back(C);
- for (Value *Op : C->operands())
- if (!AddArg(Op))
- return false;
- }
- // Find MSSA insertion point. Normally P will always have a corresponding
- // memory access before which we can insert. However, with non-standard AA
- // pipelines, there may be a mismatch between AA and MSSA, in which case we
- // will scan for a memory access before P. In either case, we know for sure
- // that at least the load will have a memory access.
- // TODO: Simplify this once P will be determined by MSSA, in which case the
- // discrepancy can no longer occur.
- MemoryUseOrDef *MemInsertPoint = nullptr;
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
- MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
- } else {
- const Instruction *ConstP = P;
- for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
- ++LI->getReverseIterator())) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
- MemInsertPoint = MA;
- break;
- }
- }
- }
- // We made it, we need to lift.
- for (auto *I : llvm::reverse(ToLift)) {
- LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
- I->moveBefore(P);
- assert(MemInsertPoint && "Must have found insert point");
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
- MSSAU->moveAfter(MA, MemInsertPoint);
- MemInsertPoint = MA;
- }
- }
- return true;
- }
- bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
- const DataLayout &DL,
- BasicBlock::iterator &BBI) {
- if (!LI->isSimple() || !LI->hasOneUse() ||
- LI->getParent() != SI->getParent())
- return false;
- auto *T = LI->getType();
- // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
- // the corresponding libcalls are not available.
- // TODO: We should really distinguish between libcall availability and
- // our ability to introduce intrinsics.
- if (T->isAggregateType() &&
- (EnableMemCpyOptWithoutLibcalls ||
- (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
- MemoryLocation LoadLoc = MemoryLocation::get(LI);
- // We use alias analysis to check if an instruction may store to
- // the memory we load from in between the load and the store. If
- // such an instruction is found, we try to promote there instead
- // of at the store position.
- // TODO: Can use MSSA for this.
- Instruction *P = SI;
- for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
- if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
- P = &I;
- break;
- }
- }
- // We found an instruction that may write to the loaded memory.
- // We can try to promote at this position instead of the store
- // position if nothing aliases the store memory after this and the store
- // destination is not in the range.
- if (P && P != SI) {
- if (!moveUp(SI, P, LI))
- P = nullptr;
- }
- // If a valid insertion position is found, then we can promote
- // the load/store pair to a memcpy.
- if (P) {
- // If we load from memory that may alias the memory we store to,
- // memmove must be used to preserve semantic. If not, memcpy can
- // be used. Also, if we load from constant memory, memcpy can be used
- // as the constant memory won't be modified.
- bool UseMemMove = false;
- if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
- UseMemMove = true;
- uint64_t Size = DL.getTypeStoreSize(T);
- IRBuilder<> Builder(P);
- Instruction *M;
- if (UseMemMove)
- M = Builder.CreateMemMove(
- SI->getPointerOperand(), SI->getAlign(),
- LI->getPointerOperand(), LI->getAlign(), Size);
- else
- M = Builder.CreateMemCpy(
- SI->getPointerOperand(), SI->getAlign(),
- LI->getPointerOperand(), LI->getAlign(), Size);
- M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
- LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
- << *M << "\n");
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- eraseInstruction(SI);
- eraseInstruction(LI);
- ++NumMemCpyInstr;
- // Make sure we do not invalidate the iterator.
- BBI = M->getIterator();
- return true;
- }
- }
- // Detect cases where we're performing call slot forwarding, but
- // happen to be using a load-store pair to implement it, rather than
- // a memcpy.
- BatchAAResults BAA(*AA);
- auto GetCall = [&]() -> CallInst * {
- // We defer this expensive clobber walk until the cheap checks
- // have been done on the source inside performCallSlotOptzn.
- if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
- MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
- return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
- return nullptr;
- };
- bool Changed = performCallSlotOptzn(
- LI, SI, SI->getPointerOperand()->stripPointerCasts(),
- LI->getPointerOperand()->stripPointerCasts(),
- DL.getTypeStoreSize(SI->getOperand(0)->getType()),
- std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
- if (Changed) {
- eraseInstruction(SI);
- eraseInstruction(LI);
- ++NumMemCpyInstr;
- return true;
- }
- return false;
- }
- bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
- if (!SI->isSimple()) return false;
- // Avoid merging nontemporal stores since the resulting
- // memcpy/memset would not be able to preserve the nontemporal hint.
- // In theory we could teach how to propagate the !nontemporal metadata to
- // memset calls. However, that change would force the backend to
- // conservatively expand !nontemporal memset calls back to sequences of
- // store instructions (effectively undoing the merging).
- if (SI->getMetadata(LLVMContext::MD_nontemporal))
- return false;
- const DataLayout &DL = SI->getModule()->getDataLayout();
- Value *StoredVal = SI->getValueOperand();
- // Not all the transforms below are correct for non-integral pointers, bail
- // until we've audited the individual pieces.
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
- return false;
- // Load to store forwarding can be interpreted as memcpy.
- if (auto *LI = dyn_cast<LoadInst>(StoredVal))
- return processStoreOfLoad(SI, LI, DL, BBI);
- // The following code creates memset intrinsics out of thin air. Don't do
- // this if the corresponding libfunc is not available.
- // TODO: We should really distinguish between libcall availability and
- // our ability to introduce intrinsics.
- if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
- return false;
- // There are two cases that are interesting for this code to handle: memcpy
- // and memset. Right now we only handle memset.
- // Ensure that the value being stored is something that can be memset'able a
- // byte at a time like "0" or "-1" or any width, as well as things like
- // 0xA0A0A0A0 and 0.0.
- auto *V = SI->getOperand(0);
- if (Value *ByteVal = isBytewiseValue(V, DL)) {
- if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
- ByteVal)) {
- BBI = I->getIterator(); // Don't invalidate iterator.
- return true;
- }
- // If we have an aggregate, we try to promote it to memset regardless
- // of opportunity for merging as it can expose optimization opportunities
- // in subsequent passes.
- auto *T = V->getType();
- if (T->isAggregateType()) {
- uint64_t Size = DL.getTypeStoreSize(T);
- IRBuilder<> Builder(SI);
- auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
- SI->getAlign());
- M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
- LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
- // The newly inserted memset is immediately overwritten by the original
- // store, so we do not need to rename uses.
- auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
- auto *NewAccess = MSSAU->createMemoryAccessBefore(
- M, StoreDef->getDefiningAccess(), StoreDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
- eraseInstruction(SI);
- NumMemSetInfer++;
- // Make sure we do not invalidate the iterator.
- BBI = M->getIterator();
- return true;
- }
- }
- return false;
- }
- bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
- // See if there is another memset or store neighboring this memset which
- // allows us to widen out the memset to do a single larger store.
- if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
- if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
- MSI->getValue())) {
- BBI = I->getIterator(); // Don't invalidate iterator.
- return true;
- }
- return false;
- }
- /// Takes a memcpy and a call that it depends on,
- /// and checks for the possibility of a call slot optimization by having
- /// the call write its result directly into the destination of the memcpy.
- bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
- Instruction *cpyStore, Value *cpyDest,
- Value *cpySrc, TypeSize cpySize,
- Align cpyDestAlign, BatchAAResults &BAA,
- std::function<CallInst *()> GetC) {
- // The general transformation to keep in mind is
- //
- // call @func(..., src, ...)
- // memcpy(dest, src, ...)
- //
- // ->
- //
- // memcpy(dest, src, ...)
- // call @func(..., dest, ...)
- //
- // Since moving the memcpy is technically awkward, we additionally check that
- // src only holds uninitialized values at the moment of the call, meaning that
- // the memcpy can be discarded rather than moved.
- // We can't optimize scalable types.
- if (cpySize.isScalable())
- return false;
- // Require that src be an alloca. This simplifies the reasoning considerably.
- auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
- if (!srcAlloca)
- return false;
- ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
- if (!srcArraySize)
- return false;
- const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
- uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
- srcArraySize->getZExtValue();
- if (cpySize < srcSize)
- return false;
- CallInst *C = GetC();
- if (!C)
- return false;
- // Lifetime marks shouldn't be operated on.
- if (Function *F = C->getCalledFunction())
- if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
- return false;
- if (C->getParent() != cpyStore->getParent()) {
- LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
- return false;
- }
- MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
- MemoryLocation::get(cpyStore) :
- MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
- // Check that nothing touches the dest of the copy between
- // the call and the store/memcpy.
- Instruction *SkippedLifetimeStart = nullptr;
- if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
- MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
- LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
- return false;
- }
- // If we need to move a lifetime.start above the call, make sure that we can
- // actually do so. If the argument is bitcasted for example, we would have to
- // move the bitcast as well, which we don't handle.
- if (SkippedLifetimeStart) {
- auto *LifetimeArg =
- dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
- if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
- C->comesBefore(LifetimeArg))
- return false;
- }
- // Check that accessing the first srcSize bytes of dest will not cause a
- // trap. Otherwise the transform is invalid since it might cause a trap
- // to occur earlier than it otherwise would.
- if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
- DL, C, AC, DT)) {
- LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
- return false;
- }
- // Make sure that nothing can observe cpyDest being written early. There are
- // a number of cases to consider:
- // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
- // the transform.
- // 2. C itself may not access cpyDest (prior to the transform). This is
- // checked further below.
- // 3. If cpyDest is accessible to the caller of this function (potentially
- // captured and not based on an alloca), we need to ensure that we cannot
- // unwind between C and cpyStore. This is checked here.
- // 4. If cpyDest is potentially captured, there may be accesses to it from
- // another thread. In this case, we need to check that cpyStore is
- // guaranteed to be executed if C is. As it is a non-atomic access, it
- // renders accesses from other threads undefined.
- // TODO: This is currently not checked.
- if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
- LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
- return false;
- }
- // Check that dest points to memory that is at least as aligned as src.
- Align srcAlign = srcAlloca->getAlign();
- bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
- // If dest is not aligned enough and we can't increase its alignment then
- // bail out.
- if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
- LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
- return false;
- }
- // Check that src is not accessed except via the call and the memcpy. This
- // guarantees that it holds only undefined values when passed in (so the final
- // memcpy can be dropped), that it is not read or written between the call and
- // the memcpy, and that writing beyond the end of it is undefined.
- SmallVector<User *, 8> srcUseList(srcAlloca->users());
- while (!srcUseList.empty()) {
- User *U = srcUseList.pop_back_val();
- if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
- append_range(srcUseList, U->users());
- continue;
- }
- if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
- if (!G->hasAllZeroIndices())
- return false;
- append_range(srcUseList, U->users());
- continue;
- }
- if (const auto *IT = dyn_cast<IntrinsicInst>(U))
- if (IT->isLifetimeStartOrEnd())
- continue;
- if (U != C && U != cpyLoad)
- return false;
- }
- // Check whether src is captured by the called function, in which case there
- // may be further indirect uses of src.
- bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
- return U->stripPointerCasts() == cpySrc &&
- !C->doesNotCapture(C->getArgOperandNo(&U));
- });
- // If src is captured, then check whether there are any potential uses of
- // src through the captured pointer before the lifetime of src ends, either
- // due to a lifetime.end or a return from the function.
- if (SrcIsCaptured) {
- // Check that dest is not captured before/at the call. We have already
- // checked that src is not captured before it. If either had been captured,
- // then the call might be comparing the argument against the captured dest
- // or src pointer.
- Value *DestObj = getUnderlyingObject(cpyDest);
- if (!isIdentifiedFunctionLocal(DestObj) ||
- PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
- /* StoreCaptures */ true, C, DT,
- /* IncludeI */ true))
- return false;
- MemoryLocation SrcLoc =
- MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
- for (Instruction &I :
- make_range(++C->getIterator(), C->getParent()->end())) {
- // Lifetime of srcAlloca ends at lifetime.end.
- if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
- II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
- cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
- break;
- }
- // Lifetime of srcAlloca ends at return.
- if (isa<ReturnInst>(&I))
- break;
- // Ignore the direct read of src in the load.
- if (&I == cpyLoad)
- continue;
- // Check whether this instruction may mod/ref src through the captured
- // pointer (we have already any direct mod/refs in the loop above).
- // Also bail if we hit a terminator, as we don't want to scan into other
- // blocks.
- if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
- return false;
- }
- }
- // Since we're changing the parameter to the callsite, we need to make sure
- // that what would be the new parameter dominates the callsite.
- if (!DT->dominates(cpyDest, C)) {
- // Support moving a constant index GEP before the call.
- auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
- if (GEP && GEP->hasAllConstantIndices() &&
- DT->dominates(GEP->getPointerOperand(), C))
- GEP->moveBefore(C);
- else
- return false;
- }
- // In addition to knowing that the call does not access src in some
- // unexpected manner, for example via a global, which we deduce from
- // the use analysis, we also need to know that it does not sneakily
- // access dest. We rely on AA to figure this out for us.
- MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
- ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
- // If necessary, perform additional analysis.
- if (isModOrRefSet(MR))
- MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
- if (isModOrRefSet(MR))
- return false;
- // We can't create address space casts here because we don't know if they're
- // safe for the target.
- if (cpySrc->getType()->getPointerAddressSpace() !=
- cpyDest->getType()->getPointerAddressSpace())
- return false;
- for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
- if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
- cpySrc->getType()->getPointerAddressSpace() !=
- C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
- return false;
- // All the checks have passed, so do the transformation.
- bool changedArgument = false;
- for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
- if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
- Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
- : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
- cpyDest->getName(), C);
- changedArgument = true;
- if (C->getArgOperand(ArgI)->getType() == Dest->getType())
- C->setArgOperand(ArgI, Dest);
- else
- C->setArgOperand(ArgI, CastInst::CreatePointerCast(
- Dest, C->getArgOperand(ArgI)->getType(),
- Dest->getName(), C));
- }
- if (!changedArgument)
- return false;
- // If the destination wasn't sufficiently aligned then increase its alignment.
- if (!isDestSufficientlyAligned) {
- assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
- cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
- }
- if (SkippedLifetimeStart) {
- SkippedLifetimeStart->moveBefore(C);
- MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
- MSSA->getMemoryAccess(C));
- }
- // Update AA metadata
- // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
- // handled here, but combineMetadata doesn't support them yet
- unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_invariant_group,
- LLVMContext::MD_access_group};
- combineMetadata(C, cpyLoad, KnownIDs, true);
- if (cpyLoad != cpyStore)
- combineMetadata(C, cpyStore, KnownIDs, true);
- ++NumCallSlot;
- return true;
- }
- /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
- /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
- bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
- MemCpyInst *MDep,
- BatchAAResults &BAA) {
- // We can only transforms memcpy's where the dest of one is the source of the
- // other.
- if (M->getSource() != MDep->getDest() || MDep->isVolatile())
- return false;
- // If dep instruction is reading from our current input, then it is a noop
- // transfer and substituting the input won't change this instruction. Just
- // ignore the input and let someone else zap MDep. This handles cases like:
- // memcpy(a <- a)
- // memcpy(b <- a)
- if (M->getSource() == MDep->getSource())
- return false;
- // Second, the length of the memcpy's must be the same, or the preceding one
- // must be larger than the following one.
- if (MDep->getLength() != M->getLength()) {
- auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
- auto *MLen = dyn_cast<ConstantInt>(M->getLength());
- if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
- return false;
- }
- // Verify that the copied-from memory doesn't change in between the two
- // transfers. For example, in:
- // memcpy(a <- b)
- // *b = 42;
- // memcpy(c <- a)
- // It would be invalid to transform the second memcpy into memcpy(c <- b).
- //
- // TODO: If the code between M and MDep is transparent to the destination "c",
- // then we could still perform the xform by moving M up to the first memcpy.
- // TODO: It would be sufficient to check the MDep source up to the memcpy
- // size of M, rather than MDep.
- if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
- return false;
- // If the dest of the second might alias the source of the first, then the
- // source and dest might overlap. In addition, if the source of the first
- // points to constant memory, they won't overlap by definition. Otherwise, we
- // still want to eliminate the intermediate value, but we have to generate a
- // memmove instead of memcpy.
- bool UseMemMove = false;
- if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep))))
- UseMemMove = true;
- // If all checks passed, then we can transform M.
- LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
- << *MDep << '\n' << *M << '\n');
- // TODO: Is this worth it if we're creating a less aligned memcpy? For
- // example we could be moving from movaps -> movq on x86.
- IRBuilder<> Builder(M);
- Instruction *NewM;
- if (UseMemMove)
- NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
- else if (isa<MemCpyInlineInst>(M)) {
- // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
- // never allowed since that would allow the latter to be lowered as a call
- // to an external function.
- NewM = Builder.CreateMemCpyInline(
- M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
- MDep->getSourceAlign(), M->getLength(), M->isVolatile());
- } else
- NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
- NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
- auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- // Remove the instruction we're replacing.
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- /// We've found that the (upward scanning) memory dependence of \p MemCpy is
- /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
- /// weren't copied over by \p MemCpy.
- ///
- /// In other words, transform:
- /// \code
- /// memset(dst, c, dst_size);
- /// memcpy(dst, src, src_size);
- /// \endcode
- /// into:
- /// \code
- /// memcpy(dst, src, src_size);
- /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
- /// \endcode
- bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
- MemSetInst *MemSet,
- BatchAAResults &BAA) {
- // We can only transform memset/memcpy with the same destination.
- if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
- return false;
- // Check that src and dst of the memcpy aren't the same. While memcpy
- // operands cannot partially overlap, exact equality is allowed.
- if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
- return false;
- // We know that dst up to src_size is not written. We now need to make sure
- // that dst up to dst_size is not accessed. (If we did not move the memset,
- // checking for reads would be sufficient.)
- if (accessedBetween(BAA, MemoryLocation::getForDest(MemSet),
- MSSA->getMemoryAccess(MemSet),
- MSSA->getMemoryAccess(MemCpy)))
- return false;
- // Use the same i8* dest as the memcpy, killing the memset dest if different.
- Value *Dest = MemCpy->getRawDest();
- Value *DestSize = MemSet->getLength();
- Value *SrcSize = MemCpy->getLength();
- if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
- return false;
- // If the sizes are the same, simply drop the memset instead of generating
- // a replacement with zero size.
- if (DestSize == SrcSize) {
- eraseInstruction(MemSet);
- return true;
- }
- // By default, create an unaligned memset.
- Align Alignment = Align(1);
- // If Dest is aligned, and SrcSize is constant, use the minimum alignment
- // of the sum.
- const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
- MemCpy->getDestAlign().valueOrOne());
- if (DestAlign > 1)
- if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
- Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
- IRBuilder<> Builder(MemCpy);
- // If the sizes have different types, zext the smaller one.
- if (DestSize->getType() != SrcSize->getType()) {
- if (DestSize->getType()->getIntegerBitWidth() >
- SrcSize->getType()->getIntegerBitWidth())
- SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
- else
- DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
- }
- Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
- Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
- Value *MemsetLen = Builder.CreateSelect(
- Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
- unsigned DestAS = Dest->getType()->getPointerAddressSpace();
- Instruction *NewMemSet = Builder.CreateMemSet(
- Builder.CreateGEP(
- Builder.getInt8Ty(),
- Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
- SrcSize),
- MemSet->getOperand(1), MemsetLen, Alignment);
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
- "MemCpy must be a MemoryDef");
- // The new memset is inserted after the memcpy, but it is known that its
- // defining access is the memset about to be removed which immediately
- // precedes the memcpy.
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessBefore(
- NewMemSet, LastDef->getDefiningAccess(), LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- eraseInstruction(MemSet);
- return true;
- }
- /// Determine whether the instruction has undefined content for the given Size,
- /// either because it was freshly alloca'd or started its lifetime.
- static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
- MemoryDef *Def, Value *Size) {
- if (MSSA->isLiveOnEntryDef(Def))
- return isa<AllocaInst>(getUnderlyingObject(V));
- if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
- auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
- if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
- if (AA.isMustAlias(V, II->getArgOperand(1)) &&
- LTSize->getZExtValue() >= CSize->getZExtValue())
- return true;
- }
- // If the lifetime.start covers a whole alloca (as it almost always
- // does) and we're querying a pointer based on that alloca, then we know
- // the memory is definitely undef, regardless of how exactly we alias.
- // The size also doesn't matter, as an out-of-bounds access would be UB.
- if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
- if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
- const DataLayout &DL = Alloca->getModule()->getDataLayout();
- if (std::optional<TypeSize> AllocaSize =
- Alloca->getAllocationSize(DL))
- if (*AllocaSize == LTSize->getValue())
- return true;
- }
- }
- }
- }
- return false;
- }
- /// Transform memcpy to memset when its source was just memset.
- /// In other words, turn:
- /// \code
- /// memset(dst1, c, dst1_size);
- /// memcpy(dst2, dst1, dst2_size);
- /// \endcode
- /// into:
- /// \code
- /// memset(dst1, c, dst1_size);
- /// memset(dst2, c, dst2_size);
- /// \endcode
- /// When dst2_size <= dst1_size.
- bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
- MemSetInst *MemSet,
- BatchAAResults &BAA) {
- // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
- // memcpying from the same address. Otherwise it is hard to reason about.
- if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
- return false;
- Value *MemSetSize = MemSet->getLength();
- Value *CopySize = MemCpy->getLength();
- if (MemSetSize != CopySize) {
- // Make sure the memcpy doesn't read any more than what the memset wrote.
- // Don't worry about sizes larger than i64.
- // A known memset size is required.
- auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
- if (!CMemSetSize)
- return false;
- // A known memcpy size is also required.
- auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
- if (!CCopySize)
- return false;
- if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
- // If the memcpy is larger than the memset, but the memory was undef prior
- // to the memset, we can just ignore the tail. Technically we're only
- // interested in the bytes from MemSetSize..CopySize here, but as we can't
- // easily represent this location, we use the full 0..CopySize range.
- MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
- bool CanReduceSize = false;
- MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
- CanReduceSize = true;
- if (!CanReduceSize)
- return false;
- CopySize = MemSetSize;
- }
- }
- IRBuilder<> Builder(MemCpy);
- Instruction *NewM =
- Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
- CopySize, MemCpy->getDestAlign());
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- return true;
- }
- /// Perform simplification of memcpy's. If we have memcpy A
- /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
- /// B to be a memcpy from X to Z (or potentially a memmove, depending on
- /// circumstances). This allows later passes to remove the first memcpy
- /// altogether.
- bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
- // We can only optimize non-volatile memcpy's.
- if (M->isVolatile()) return false;
- // If the source and destination of the memcpy are the same, then zap it.
- if (M->getSource() == M->getDest()) {
- ++BBI;
- eraseInstruction(M);
- return true;
- }
- // If copying from a constant, try to turn the memcpy into a memset.
- if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
- if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
- M->getModule()->getDataLayout())) {
- IRBuilder<> Builder(M);
- Instruction *NewM = Builder.CreateMemSet(
- M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess =
- MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- eraseInstruction(M);
- ++NumCpyToSet;
- return true;
- }
- BatchAAResults BAA(*AA);
- MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
- // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
- MemoryAccess *AnyClobber = MA->getDefiningAccess();
- MemoryLocation DestLoc = MemoryLocation::getForDest(M);
- const MemoryAccess *DestClobber =
- MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- // The memcpy most post-dom the memset, so limit this to the same basic
- // block. A non-local generalization is likely not worthwhile.
- if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
- if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
- if (DestClobber->getBlock() == M->getParent())
- if (processMemSetMemCpyDependence(M, MDep, BAA))
- return true;
- MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
- AnyClobber, MemoryLocation::getForSource(M), BAA);
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started
- // its lifetime copies undefined data, and we can therefore eliminate
- // the memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
- if (Instruction *MI = MD->getMemoryInst()) {
- if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
- if (auto *C = dyn_cast<CallInst>(MI)) {
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- TypeSize::getFixed(CopySize->getZExtValue()),
- M->getDestAlign().valueOrOne(), BAA,
- [C]() -> CallInst * { return C; })) {
- LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
- << " call: " << *C << "\n"
- << " memcpy: " << *M << "\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
- }
- if (auto *MDep = dyn_cast<MemCpyInst>(MI))
- return processMemCpyMemCpyDependence(M, MDep, BAA);
- if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
- if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
- LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
- eraseInstruction(M);
- ++NumCpyToSet;
- return true;
- }
- }
- }
- if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
- LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
- return false;
- }
- /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
- /// not to alias.
- bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
- // See if the source could be modified by this memmove potentially.
- if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
- return false;
- LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
- << "\n");
- // If not, then we know we can transform this.
- Type *ArgTys[3] = { M->getRawDest()->getType(),
- M->getRawSource()->getType(),
- M->getLength()->getType() };
- M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
- Intrinsic::memcpy, ArgTys));
- // For MemorySSA nothing really changes (except that memcpy may imply stricter
- // aliasing guarantees).
- ++NumMoveToCpy;
- return true;
- }
- /// This is called on every byval argument in call sites.
- bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
- const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
- // Find out what feeds this byval argument.
- Value *ByValArg = CB.getArgOperand(ArgNo);
- Type *ByValTy = CB.getParamByValType(ArgNo);
- TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
- MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
- MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
- if (!CallAccess)
- return false;
- MemCpyInst *MDep = nullptr;
- BatchAAResults BAA(*AA);
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- CallAccess->getDefiningAccess(), Loc, BAA);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
- // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
- // a memcpy, see if we can byval from the source of the memcpy instead of the
- // result.
- if (!MDep || MDep->isVolatile() ||
- ByValArg->stripPointerCasts() != MDep->getDest())
- return false;
- // The length of the memcpy must be larger or equal to the size of the byval.
- auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- if (!C1 || !TypeSize::isKnownGE(
- TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
- return false;
- // Get the alignment of the byval. If the call doesn't specify the alignment,
- // then it is some target specific value that we can't know.
- MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
- if (!ByValAlign) return false;
- // If it is greater than the memcpy, then we check to see if we can force the
- // source of the memcpy to the alignment we need. If we fail, we bail out.
- MaybeAlign MemDepAlign = MDep->getSourceAlign();
- if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
- getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
- DT) < *ByValAlign)
- return false;
- // The address space of the memcpy source must match the byval argument
- if (MDep->getSource()->getType()->getPointerAddressSpace() !=
- ByValArg->getType()->getPointerAddressSpace())
- return false;
- // Verify that the copied-from memory doesn't change in between the memcpy and
- // the byval call.
- // memcpy(a <- b)
- // *b = 42;
- // foo(*a)
- // It would be invalid to transform the second memcpy into foo(*b).
- if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
- return false;
- Value *TmpCast = MDep->getSource();
- if (MDep->getSource()->getType() != ByValArg->getType()) {
- BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
- "tmpcast", &CB);
- // Set the tmpcast's DebugLoc to MDep's
- TmpBitCast->setDebugLoc(MDep->getDebugLoc());
- TmpCast = TmpBitCast;
- }
- LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
- << " " << *MDep << "\n"
- << " " << CB << "\n");
- // Otherwise we're good! Update the byval argument.
- CB.setArgOperand(ArgNo, TmpCast);
- ++NumMemCpyInstr;
- return true;
- }
- /// Executes one iteration of MemCpyOptPass.
- bool MemCpyOptPass::iterateOnFunction(Function &F) {
- bool MadeChange = false;
- // Walk all instruction in the function.
- for (BasicBlock &BB : F) {
- // Skip unreachable blocks. For example processStore assumes that an
- // instruction in a BB can't be dominated by a later instruction in the
- // same BB (which is a scenario that can happen for an unreachable BB that
- // has itself as a predecessor).
- if (!DT->isReachableFromEntry(&BB))
- continue;
- for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
- // Avoid invalidating the iterator.
- Instruction *I = &*BI++;
- bool RepeatInstruction = false;
- if (auto *SI = dyn_cast<StoreInst>(I))
- MadeChange |= processStore(SI, BI);
- else if (auto *M = dyn_cast<MemSetInst>(I))
- RepeatInstruction = processMemSet(M, BI);
- else if (auto *M = dyn_cast<MemCpyInst>(I))
- RepeatInstruction = processMemCpy(M, BI);
- else if (auto *M = dyn_cast<MemMoveInst>(I))
- RepeatInstruction = processMemMove(M);
- else if (auto *CB = dyn_cast<CallBase>(I)) {
- for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
- if (CB->isByValArgument(i))
- MadeChange |= processByValArgument(*CB, i);
- }
- // Reprocess the instruction if desired.
- if (RepeatInstruction) {
- if (BI != BB.begin())
- --BI;
- MadeChange = true;
- }
- }
- }
- return MadeChange;
- }
- PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- auto *AA = &AM.getResult<AAManager>(F);
- auto *AC = &AM.getResult<AssumptionAnalysis>(F);
- auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
- bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
- if (!MadeChange)
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- PA.preserve<MemorySSAAnalysis>();
- return PA;
- }
- bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
- AliasAnalysis *AA_, AssumptionCache *AC_,
- DominatorTree *DT_, MemorySSA *MSSA_) {
- bool MadeChange = false;
- TLI = TLI_;
- AA = AA_;
- AC = AC_;
- DT = DT_;
- MSSA = MSSA_;
- MemorySSAUpdater MSSAU_(MSSA_);
- MSSAU = &MSSAU_;
- while (true) {
- if (!iterateOnFunction(F))
- break;
- MadeChange = true;
- }
- if (VerifyMemorySSA)
- MSSA_->verifyMemorySSA();
- return MadeChange;
- }
- /// This is the main transformation entry point for a function.
- bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
- }
|