123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431 |
- //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- //! \file
- //! This pass performs merges of loads and stores on both sides of a
- // diamond (hammock). It hoists the loads and sinks the stores.
- //
- // The algorithm iteratively hoists two loads to the same address out of a
- // diamond (hammock) and merges them into a single load in the header. Similar
- // it sinks and merges two stores to the tail block (footer). The algorithm
- // iterates over the instructions of one side of the diamond and attempts to
- // find a matching load/store on the other side. New tail/footer block may be
- // insterted if the tail/footer block has more predecessors (not only the two
- // predecessors that are forming the diamond). It hoists / sinks when it thinks
- // it safe to do so. This optimization helps with eg. hiding load latencies,
- // triggering if-conversion, and reducing static code size.
- //
- // NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist.
- //
- //===----------------------------------------------------------------------===//
- //
- //
- // Example:
- // Diamond shaped code before merge:
- //
- // header:
- // br %cond, label %if.then, label %if.else
- // + +
- // + +
- // + +
- // if.then: if.else:
- // %lt = load %addr_l %le = load %addr_l
- // <use %lt> <use %le>
- // <...> <...>
- // store %st, %addr_s store %se, %addr_s
- // br label %if.end br label %if.end
- // + +
- // + +
- // + +
- // if.end ("footer"):
- // <...>
- //
- // Diamond shaped code after merge:
- //
- // header:
- // %l = load %addr_l
- // br %cond, label %if.then, label %if.else
- // + +
- // + +
- // + +
- // if.then: if.else:
- // <use %l> <use %l>
- // <...> <...>
- // br label %if.end br label %if.end
- // + +
- // + +
- // + +
- // if.end ("footer"):
- // %s.sink = phi [%st, if.then], [%se, if.else]
- // <...>
- // store %s.sink, %addr_s
- // <...>
- //
- //
- //===----------------------- TODO -----------------------------------------===//
- //
- // 1) Generalize to regions other than diamonds
- // 2) Be more aggressive merging memory operations
- // Note that both changes require register pressure control
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
- #include "llvm/ADT/Statistic.h"
- #include "llvm/Analysis/AliasAnalysis.h"
- #include "llvm/Analysis/CFG.h"
- #include "llvm/Analysis/GlobalsModRef.h"
- #include "llvm/Analysis/Loads.h"
- #include "llvm/Analysis/ValueTracking.h"
- #include "llvm/IR/Metadata.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/raw_ostream.h"
- #include "llvm/Transforms/Scalar.h"
- #include "llvm/Transforms/Utils/BasicBlockUtils.h"
- using namespace llvm;
- #define DEBUG_TYPE "mldst-motion"
- namespace {
- //===----------------------------------------------------------------------===//
- // MergedLoadStoreMotion Pass
- //===----------------------------------------------------------------------===//
- class MergedLoadStoreMotion {
- AliasAnalysis *AA = nullptr;
- // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
- // where Size0 and Size1 are the #instructions on the two sides of
- // the diamond. The constant chosen here is arbitrary. Compiler Time
- // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
- const int MagicCompileTimeControl = 250;
- const bool SplitFooterBB;
- public:
- MergedLoadStoreMotion(bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {}
- bool run(Function &F, AliasAnalysis &AA);
- private:
- BasicBlock *getDiamondTail(BasicBlock *BB);
- bool isDiamondHead(BasicBlock *BB);
- // Routines for sinking stores
- StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
- PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
- bool isStoreSinkBarrierInRange(const Instruction &Start,
- const Instruction &End, MemoryLocation Loc);
- bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const;
- void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand,
- StoreInst *ElseInst);
- bool mergeStores(BasicBlock *BB);
- };
- } // end anonymous namespace
- ///
- /// Return tail block of a diamond.
- ///
- BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
- assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
- return BB->getTerminator()->getSuccessor(0)->getSingleSuccessor();
- }
- ///
- /// True when BB is the head of a diamond (hammock)
- ///
- bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
- if (!BB)
- return false;
- auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
- if (!BI || !BI->isConditional())
- return false;
- BasicBlock *Succ0 = BI->getSuccessor(0);
- BasicBlock *Succ1 = BI->getSuccessor(1);
- if (!Succ0->getSinglePredecessor())
- return false;
- if (!Succ1->getSinglePredecessor())
- return false;
- BasicBlock *Succ0Succ = Succ0->getSingleSuccessor();
- BasicBlock *Succ1Succ = Succ1->getSingleSuccessor();
- // Ignore triangles.
- if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ)
- return false;
- return true;
- }
- ///
- /// True when instruction is a sink barrier for a store
- /// located in Loc
- ///
- /// Whenever an instruction could possibly read or modify the
- /// value being stored or protect against the store from
- /// happening it is considered a sink barrier.
- ///
- bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
- const Instruction &End,
- MemoryLocation Loc) {
- for (const Instruction &Inst :
- make_range(Start.getIterator(), End.getIterator()))
- if (Inst.mayThrow())
- return true;
- return AA->canInstructionRangeModRef(Start, End, Loc, ModRefInfo::ModRef);
- }
- ///
- /// Check if \p BB contains a store to the same address as \p SI
- ///
- /// \return The store in \p when it is safe to sink. Otherwise return Null.
- ///
- StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
- StoreInst *Store0) {
- LLVM_DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
- BasicBlock *BB0 = Store0->getParent();
- for (Instruction &Inst : reverse(*BB1)) {
- auto *Store1 = dyn_cast<StoreInst>(&Inst);
- if (!Store1)
- continue;
- MemoryLocation Loc0 = MemoryLocation::get(Store0);
- MemoryLocation Loc1 = MemoryLocation::get(Store1);
- if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
- !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
- !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
- return Store1;
- }
- }
- return nullptr;
- }
- ///
- /// Create a PHI node in BB for the operands of S0 and S1
- ///
- PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
- StoreInst *S1) {
- // Create a phi if the values mismatch.
- Value *Opd1 = S0->getValueOperand();
- Value *Opd2 = S1->getValueOperand();
- if (Opd1 == Opd2)
- return nullptr;
- auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
- &BB->front());
- NewPN->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
- NewPN->addIncoming(Opd1, S0->getParent());
- NewPN->addIncoming(Opd2, S1->getParent());
- return NewPN;
- }
- ///
- /// Check if 2 stores can be sunk together with corresponding GEPs
- ///
- bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0,
- StoreInst *S1) const {
- auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
- auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
- return A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
- (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
- (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0);
- }
- ///
- /// Merge two stores to same address and sink into \p BB
- ///
- /// Also sinks GEP instruction computing the store address
- ///
- void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0,
- StoreInst *S1) {
- // Only one definition?
- auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
- auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
- LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
- dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
- dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
- // Hoist the instruction.
- BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
- // Intersect optional metadata.
- S0->andIRFlags(S1);
- S0->dropUnknownNonDebugMetadata();
- // Create the new store to be inserted at the join point.
- StoreInst *SNew = cast<StoreInst>(S0->clone());
- Instruction *ANew = A0->clone();
- SNew->insertBefore(&*InsertPt);
- ANew->insertBefore(SNew);
- assert(S0->getParent() == A0->getParent());
- assert(S1->getParent() == A1->getParent());
- // New PHI operand? Use it.
- if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
- SNew->setOperand(0, NewPN);
- S0->eraseFromParent();
- S1->eraseFromParent();
- A0->replaceAllUsesWith(ANew);
- A0->eraseFromParent();
- A1->replaceAllUsesWith(ANew);
- A1->eraseFromParent();
- }
- ///
- /// True when two stores are equivalent and can sink into the footer
- ///
- /// Starting from a diamond head block, iterate over the instructions in one
- /// successor block and try to match a store in the second successor.
- ///
- bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) {
- bool MergedStores = false;
- BasicBlock *TailBB = getDiamondTail(HeadBB);
- BasicBlock *SinkBB = TailBB;
- assert(SinkBB && "Footer of a diamond cannot be empty");
- succ_iterator SI = succ_begin(HeadBB);
- assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors");
- BasicBlock *Pred0 = *SI;
- ++SI;
- assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor");
- BasicBlock *Pred1 = *SI;
- // tail block of a diamond/hammock?
- if (Pred0 == Pred1)
- return false; // No.
- // bail out early if we can not merge into the footer BB
- if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(3))
- return false;
- // #Instructions in Pred1 for Compile Time Control
- auto InstsNoDbg = Pred1->instructionsWithoutDebug();
- int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end());
- int NStores = 0;
- for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
- RBI != RBE;) {
- Instruction *I = &*RBI;
- ++RBI;
- // Don't sink non-simple (atomic, volatile) stores.
- auto *S0 = dyn_cast<StoreInst>(I);
- if (!S0 || !S0->isSimple())
- continue;
- ++NStores;
- if (NStores * Size1 >= MagicCompileTimeControl)
- break;
- if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
- if (!canSinkStoresAndGEPs(S0, S1))
- // Don't attempt to sink below stores that had to stick around
- // But after removal of a store and some of its feeding
- // instruction search again from the beginning since the iterator
- // is likely stale at this point.
- break;
- if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) {
- // We have more than 2 predecessors. Insert a new block
- // postdominating 2 predecessors we're going to sink from.
- SinkBB = SplitBlockPredecessors(TailBB, {Pred0, Pred1}, ".sink.split");
- if (!SinkBB)
- break;
- }
- MergedStores = true;
- sinkStoresAndGEPs(SinkBB, S0, S1);
- RBI = Pred0->rbegin();
- RBE = Pred0->rend();
- LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
- }
- }
- return MergedStores;
- }
- bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) {
- this->AA = &AA;
- bool Changed = false;
- LLVM_DEBUG(dbgs() << "Instruction Merger\n");
- // Merge unconditional branches, allowing PRE to catch more
- // optimization opportunities.
- // This loop doesn't care about newly inserted/split blocks
- // since they never will be diamond heads.
- for (BasicBlock &BB : make_early_inc_range(F))
- // Hoist equivalent loads and sink stores
- // outside diamonds when possible
- if (isDiamondHead(&BB))
- Changed |= mergeStores(&BB);
- return Changed;
- }
- namespace {
- class MergedLoadStoreMotionLegacyPass : public FunctionPass {
- const bool SplitFooterBB;
- public:
- static char ID; // Pass identification, replacement for typeid
- MergedLoadStoreMotionLegacyPass(bool SplitFooterBB = false)
- : FunctionPass(ID), SplitFooterBB(SplitFooterBB) {
- initializeMergedLoadStoreMotionLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
- ///
- /// Run the transformation for each function
- ///
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- MergedLoadStoreMotion Impl(SplitFooterBB);
- return Impl.run(F, getAnalysis<AAResultsWrapperPass>().getAAResults());
- }
- private:
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- if (!SplitFooterBB)
- AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
- };
- char MergedLoadStoreMotionLegacyPass::ID = 0;
- } // anonymous namespace
- ///
- /// createMergedLoadStoreMotionPass - The public interface to this file.
- ///
- FunctionPass *llvm::createMergedLoadStoreMotionPass(bool SplitFooterBB) {
- return new MergedLoadStoreMotionLegacyPass(SplitFooterBB);
- }
- INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion",
- "MergedLoadStoreMotion", false, false)
- INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
- INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion",
- "MergedLoadStoreMotion", false, false)
- PreservedAnalyses
- MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
- MergedLoadStoreMotion Impl(Options.SplitFooterBB);
- auto &AA = AM.getResult<AAManager>(F);
- if (!Impl.run(F, AA))
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- if (!Options.SplitFooterBB)
- PA.preserveSet<CFGAnalyses>();
- return PA;
- }
- void MergedLoadStoreMotionPass::printPipeline(
- raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
- static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline(
- OS, MapClassName2PassName);
- OS << "<";
- OS << (Options.SplitFooterBB ? "" : "no-") << "split-footer-bb";
- OS << ">";
- }
|