123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560 |
- //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- ///
- /// \file
- /// This file implements a pass that removes irreducible control flow.
- /// Irreducible control flow means multiple-entry loops, which this pass
- /// transforms to have a single entry.
- ///
- /// Note that LLVM has a generic pass that lowers irreducible control flow, but
- /// it linearizes control flow, turning diamonds into two triangles, which is
- /// both unnecessary and undesirable for WebAssembly.
- ///
- /// The big picture: We recursively process each "region", defined as a group
- /// of blocks with a single entry and no branches back to that entry. A region
- /// may be the entire function body, or the inner part of a loop, i.e., the
- /// loop's body without branches back to the loop entry. In each region we fix
- /// up multi-entry loops by adding a new block that can dispatch to each of the
- /// loop entries, based on the value of a label "helper" variable, and we
- /// replace direct branches to the entries with assignments to the label
- /// variable and a branch to the dispatch block. Then the dispatch block is the
- /// single entry in the loop containing the previous multiple entries. After
- /// ensuring all the loops in a region are reducible, we recurse into them. The
- /// total time complexity of this pass is:
- ///
- /// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
- /// NumLoops * NumLoops)
- ///
- /// This pass is similar to what the Relooper [1] does. Both identify looping
- /// code that requires multiple entries, and resolve it in a similar way (in
- /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
- /// also that like the Relooper, we implement a "minimal" intervention: we only
- /// use the "label" helper for the blocks we absolutely must and no others. We
- /// also prioritize code size and do not duplicate code in order to resolve
- /// irreducibility. The graph algorithms for finding loops and entries and so
- /// forth are also similar to the Relooper. The main differences between this
- /// pass and the Relooper are:
- ///
- /// * We just care about irreducibility, so we just look at loops.
- /// * The Relooper emits structured control flow (with ifs etc.), while we
- /// emit a CFG.
- ///
- /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
- /// Proceedings of the ACM international conference companion on Object oriented
- /// programming systems languages and applications companion (SPLASH '11). ACM,
- /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
- /// http://doi.acm.org/10.1145/2048147.2048224
- ///
- //===----------------------------------------------------------------------===//
- #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
- #include "WebAssembly.h"
- #include "WebAssemblySubtarget.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include "llvm/CodeGen/MachineInstrBuilder.h"
- #include "llvm/Support/Debug.h"
- using namespace llvm;
- #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
- namespace {
- using BlockVector = SmallVector<MachineBasicBlock *, 4>;
- using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
- static BlockVector getSortedEntries(const BlockSet &Entries) {
- BlockVector SortedEntries(Entries.begin(), Entries.end());
- llvm::sort(SortedEntries,
- [](const MachineBasicBlock *A, const MachineBasicBlock *B) {
- auto ANum = A->getNumber();
- auto BNum = B->getNumber();
- return ANum < BNum;
- });
- return SortedEntries;
- }
- // Calculates reachability in a region. Ignores branches to blocks outside of
- // the region, and ignores branches to the region entry (for the case where
- // the region is the inner part of a loop).
- class ReachabilityGraph {
- public:
- ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
- : Entry(Entry), Blocks(Blocks) {
- #ifndef NDEBUG
- // The region must have a single entry.
- for (auto *MBB : Blocks) {
- if (MBB != Entry) {
- for (auto *Pred : MBB->predecessors()) {
- assert(inRegion(Pred));
- }
- }
- }
- #endif
- calculate();
- }
- bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
- assert(inRegion(From) && inRegion(To));
- auto I = Reachable.find(From);
- if (I == Reachable.end())
- return false;
- return I->second.count(To);
- }
- // "Loopers" are blocks that are in a loop. We detect these by finding blocks
- // that can reach themselves.
- const BlockSet &getLoopers() const { return Loopers; }
- // Get all blocks that are loop entries.
- const BlockSet &getLoopEntries() const { return LoopEntries; }
- // Get all blocks that enter a particular loop from outside.
- const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
- assert(inRegion(LoopEntry));
- auto I = LoopEnterers.find(LoopEntry);
- assert(I != LoopEnterers.end());
- return I->second;
- }
- private:
- MachineBasicBlock *Entry;
- const BlockSet &Blocks;
- BlockSet Loopers, LoopEntries;
- DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
- bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
- // Maps a block to all the other blocks it can reach.
- DenseMap<MachineBasicBlock *, BlockSet> Reachable;
- void calculate() {
- // Reachability computation work list. Contains pairs of recent additions
- // (A, B) where we just added a link A => B.
- using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
- SmallVector<BlockPair, 4> WorkList;
- // Add all relevant direct branches.
- for (auto *MBB : Blocks) {
- for (auto *Succ : MBB->successors()) {
- if (Succ != Entry && inRegion(Succ)) {
- Reachable[MBB].insert(Succ);
- WorkList.emplace_back(MBB, Succ);
- }
- }
- }
- while (!WorkList.empty()) {
- MachineBasicBlock *MBB, *Succ;
- std::tie(MBB, Succ) = WorkList.pop_back_val();
- assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
- if (MBB != Entry) {
- // We recently added MBB => Succ, and that means we may have enabled
- // Pred => MBB => Succ.
- for (auto *Pred : MBB->predecessors()) {
- if (Reachable[Pred].insert(Succ).second) {
- WorkList.emplace_back(Pred, Succ);
- }
- }
- }
- }
- // Blocks that can return to themselves are in a loop.
- for (auto *MBB : Blocks) {
- if (canReach(MBB, MBB)) {
- Loopers.insert(MBB);
- }
- }
- assert(!Loopers.count(Entry));
- // Find the loop entries - loopers reachable from blocks not in that loop -
- // and those outside blocks that reach them, the "loop enterers".
- for (auto *Looper : Loopers) {
- for (auto *Pred : Looper->predecessors()) {
- // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
- // otherwise, it is a block that enters into the loop.
- if (!canReach(Looper, Pred)) {
- LoopEntries.insert(Looper);
- LoopEnterers[Looper].insert(Pred);
- }
- }
- }
- }
- };
- // Finds the blocks in a single-entry loop, given the loop entry and the
- // list of blocks that enter the loop.
- class LoopBlocks {
- public:
- LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
- : Entry(Entry), Enterers(Enterers) {
- calculate();
- }
- BlockSet &getBlocks() { return Blocks; }
- private:
- MachineBasicBlock *Entry;
- const BlockSet &Enterers;
- BlockSet Blocks;
- void calculate() {
- // Going backwards from the loop entry, if we ignore the blocks entering
- // from outside, we will traverse all the blocks in the loop.
- BlockVector WorkList;
- BlockSet AddedToWorkList;
- Blocks.insert(Entry);
- for (auto *Pred : Entry->predecessors()) {
- if (!Enterers.count(Pred)) {
- WorkList.push_back(Pred);
- AddedToWorkList.insert(Pred);
- }
- }
- while (!WorkList.empty()) {
- auto *MBB = WorkList.pop_back_val();
- assert(!Enterers.count(MBB));
- if (Blocks.insert(MBB).second) {
- for (auto *Pred : MBB->predecessors()) {
- if (AddedToWorkList.insert(Pred).second)
- WorkList.push_back(Pred);
- }
- }
- }
- }
- };
- class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
- StringRef getPassName() const override {
- return "WebAssembly Fix Irreducible Control Flow";
- }
- bool runOnMachineFunction(MachineFunction &MF) override;
- bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
- MachineFunction &MF);
- void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
- MachineFunction &MF, const ReachabilityGraph &Graph);
- public:
- static char ID; // Pass identification, replacement for typeid
- WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
- };
- bool WebAssemblyFixIrreducibleControlFlow::processRegion(
- MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
- bool Changed = false;
- // Remove irreducibility before processing child loops, which may take
- // multiple iterations.
- while (true) {
- ReachabilityGraph Graph(Entry, Blocks);
- bool FoundIrreducibility = false;
- for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) {
- // Find mutual entries - all entries which can reach this one, and
- // are reached by it (that always includes LoopEntry itself). All mutual
- // entries must be in the same loop, so if we have more than one, then we
- // have irreducible control flow.
- //
- // (Note that we need to sort the entries here, as otherwise the order can
- // matter: being mutual is a symmetric relationship, and each set of
- // mutuals will be handled properly no matter which we see first. However,
- // there can be multiple disjoint sets of mutuals, and which we process
- // first changes the output.)
- //
- // Note that irreducibility may involve inner loops, e.g. imagine A
- // starts one loop, and it has B inside it which starts an inner loop.
- // If we add a branch from all the way on the outside to B, then in a
- // sense B is no longer an "inner" loop, semantically speaking. We will
- // fix that irreducibility by adding a block that dispatches to either
- // either A or B, so B will no longer be an inner loop in our output.
- // (A fancier approach might try to keep it as such.)
- //
- // Note that we still need to recurse into inner loops later, to handle
- // the case where the irreducibility is entirely nested - we would not
- // be able to identify that at this point, since the enclosing loop is
- // a group of blocks all of whom can reach each other. (We'll see the
- // irreducibility after removing branches to the top of that enclosing
- // loop.)
- BlockSet MutualLoopEntries;
- MutualLoopEntries.insert(LoopEntry);
- for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
- if (OtherLoopEntry != LoopEntry &&
- Graph.canReach(LoopEntry, OtherLoopEntry) &&
- Graph.canReach(OtherLoopEntry, LoopEntry)) {
- MutualLoopEntries.insert(OtherLoopEntry);
- }
- }
- if (MutualLoopEntries.size() > 1) {
- makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
- FoundIrreducibility = true;
- Changed = true;
- break;
- }
- }
- // Only go on to actually process the inner loops when we are done
- // removing irreducible control flow and changing the graph. Modifying
- // the graph as we go is possible, and that might let us avoid looking at
- // the already-fixed loops again if we are careful, but all that is
- // complex and bug-prone. Since irreducible loops are rare, just starting
- // another iteration is best.
- if (FoundIrreducibility) {
- continue;
- }
- for (auto *LoopEntry : Graph.getLoopEntries()) {
- LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
- // Each of these calls to processRegion may change the graph, but are
- // guaranteed not to interfere with each other. The only changes we make
- // to the graph are to add blocks on the way to a loop entry. As the
- // loops are disjoint, that means we may only alter branches that exit
- // another loop, which are ignored when recursing into that other loop
- // anyhow.
- if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
- Changed = true;
- }
- }
- return Changed;
- }
- }
- // Given a set of entries to a single loop, create a single entry for that
- // loop by creating a dispatch block for them, routing control flow using
- // a helper variable. Also updates Blocks with any new blocks created, so
- // that we properly track all the blocks in the region. But this does not update
- // ReachabilityGraph; this will be updated in the caller of this function as
- // needed.
- void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
- BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
- const ReachabilityGraph &Graph) {
- assert(Entries.size() >= 2);
- // Sort the entries to ensure a deterministic build.
- BlockVector SortedEntries = getSortedEntries(Entries);
- #ifndef NDEBUG
- for (auto *Block : SortedEntries)
- assert(Block->getNumber() != -1);
- if (SortedEntries.size() > 1) {
- for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
- ++I) {
- auto ANum = (*I)->getNumber();
- auto BNum = (*(std::next(I)))->getNumber();
- assert(ANum != BNum);
- }
- }
- #endif
- // Create a dispatch block which will contain a jump table to the entries.
- MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
- MF.insert(MF.end(), Dispatch);
- Blocks.insert(Dispatch);
- // Add the jump table.
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- MachineInstrBuilder MIB =
- BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
- // Add the register which will be used to tell the jump table which block to
- // jump to.
- MachineRegisterInfo &MRI = MF.getRegInfo();
- Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
- MIB.addReg(Reg);
- // Compute the indices in the superheader, one for each bad block, and
- // add them as successors.
- DenseMap<MachineBasicBlock *, unsigned> Indices;
- for (auto *Entry : SortedEntries) {
- auto Pair = Indices.insert(std::make_pair(Entry, 0));
- assert(Pair.second);
- unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
- Pair.first->second = Index;
- MIB.addMBB(Entry);
- Dispatch->addSuccessor(Entry);
- }
- // Rewrite the problematic successors for every block that wants to reach
- // the bad blocks. For simplicity, we just introduce a new block for every
- // edge we need to rewrite. (Fancier things are possible.)
- BlockVector AllPreds;
- for (auto *Entry : SortedEntries) {
- for (auto *Pred : Entry->predecessors()) {
- if (Pred != Dispatch) {
- AllPreds.push_back(Pred);
- }
- }
- }
- // This set stores predecessors within this loop.
- DenseSet<MachineBasicBlock *> InLoop;
- for (auto *Pred : AllPreds) {
- for (auto *Entry : Pred->successors()) {
- if (!Entries.count(Entry))
- continue;
- if (Graph.canReach(Entry, Pred)) {
- InLoop.insert(Pred);
- break;
- }
- }
- }
- // Record if each entry has a layout predecessor. This map stores
- // <<loop entry, Predecessor is within the loop?>, layout predecessor>
- DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
- EntryToLayoutPred;
- for (auto *Pred : AllPreds) {
- bool PredInLoop = InLoop.count(Pred);
- for (auto *Entry : Pred->successors())
- if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
- EntryToLayoutPred[{Entry, PredInLoop}] = Pred;
- }
- // We need to create at most two routing blocks per entry: one for
- // predecessors outside the loop and one for predecessors inside the loop.
- // This map stores
- // <<loop entry, Predecessor is within the loop?>, routing block>
- DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
- Map;
- for (auto *Pred : AllPreds) {
- bool PredInLoop = InLoop.count(Pred);
- for (auto *Entry : Pred->successors()) {
- if (!Entries.count(Entry) || Map.count({Entry, PredInLoop}))
- continue;
- // If there exists a layout predecessor of this entry and this predecessor
- // is not that, we rather create a routing block after that layout
- // predecessor to save a branch.
- if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop}))
- if (OtherPred != Pred)
- continue;
- // This is a successor we need to rewrite.
- MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
- MF.insert(Pred->isLayoutSuccessor(Entry)
- ? MachineFunction::iterator(Entry)
- : MF.end(),
- Routing);
- Blocks.insert(Routing);
- // Set the jump table's register of the index of the block we wish to
- // jump to, and jump to the jump table.
- BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
- .addImm(Indices[Entry]);
- BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
- Routing->addSuccessor(Dispatch);
- Map[{Entry, PredInLoop}] = Routing;
- }
- }
- for (auto *Pred : AllPreds) {
- bool PredInLoop = InLoop.count(Pred);
- // Remap the terminator operands and the successor list.
- for (MachineInstr &Term : Pred->terminators())
- for (auto &Op : Term.explicit_uses())
- if (Op.isMBB() && Indices.count(Op.getMBB()))
- Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);
- for (auto *Succ : Pred->successors()) {
- if (!Entries.count(Succ))
- continue;
- auto *Routing = Map[{Succ, PredInLoop}];
- Pred->replaceSuccessor(Succ, Routing);
- }
- }
- // Create a fake default label, because br_table requires one.
- MIB.addMBB(MIB.getInstr()
- ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
- .getMBB());
- }
- } // end anonymous namespace
- char WebAssemblyFixIrreducibleControlFlow::ID = 0;
- INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
- "Removes irreducible control flow", false, false)
- FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
- return new WebAssemblyFixIrreducibleControlFlow();
- }
- // Test whether the given register has an ARGUMENT def.
- static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
- for (const auto &Def : MRI.def_instructions(Reg))
- if (WebAssembly::isArgument(Def.getOpcode()))
- return true;
- return false;
- }
- // Add a register definition with IMPLICIT_DEFs for every register to cover for
- // register uses that don't have defs in every possible path.
- // TODO: This is fairly heavy-handed; find a better approach.
- static void addImplicitDefs(MachineFunction &MF) {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- MachineBasicBlock &Entry = *MF.begin();
- for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
- Register Reg = Register::index2VirtReg(I);
- // Skip unused registers.
- if (MRI.use_nodbg_empty(Reg))
- continue;
- // Skip registers that have an ARGUMENT definition.
- if (hasArgumentDef(Reg, MRI))
- continue;
- BuildMI(Entry, Entry.begin(), DebugLoc(),
- TII.get(WebAssembly::IMPLICIT_DEF), Reg);
- }
- // Move ARGUMENT_* instructions to the top of the entry block, so that their
- // liveness reflects the fact that these really are live-in values.
- for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
- if (WebAssembly::isArgument(MI.getOpcode())) {
- MI.removeFromParent();
- Entry.insert(Entry.begin(), &MI);
- }
- }
- }
- bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
- MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
- "********** Function: "
- << MF.getName() << '\n');
- // Start the recursive process on the entire function body.
- BlockSet AllBlocks;
- for (auto &MBB : MF) {
- AllBlocks.insert(&MBB);
- }
- if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
- // We rewrote part of the function; recompute relevant things.
- MF.RenumberBlocks();
- // Now we've inserted dispatch blocks, some register uses can have incoming
- // paths without a def. For example, before this pass register %a was
- // defined in BB1 and used in BB2, and there was only one path from BB1 and
- // BB2. But if this pass inserts a dispatch block having multiple
- // predecessors between the two BBs, now there are paths to BB2 without
- // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
- // IMPLICIT_DEFs to all regs is one simple way to fix it.
- addImplicitDefs(MF);
- return true;
- }
- return false;
- }
|