//===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===---------------------------------------------------------------------===// /// /// \file /// \brief This implements the Relooper algorithm. This implementation includes /// optimizations added since the original academic paper [1] was published. /// /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In /// Proceedings of the ACM international conference companion on Object /// oriented programming systems languages and applications companion /// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 /// http://doi.acm.org/10.1145/2048147.2048224 /// //===-------------------------------------------------------------------===// #include "Relooper.h" #include "WebAssembly.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cstring> #include <cstdlib> #include <functional> #include <list> #include <stack> #include <string> #define DEBUG_TYPE "relooper" using namespace llvm; using namespace Relooper; static cl::opt<int> RelooperSplittingFactor( "relooper-splitting-factor", cl::desc( "How much to discount code size when deciding whether to split a node"), cl::init(5)); static cl::opt<unsigned> RelooperMultipleSwitchThreshold( "relooper-multiple-switch-threshold", cl::desc( "How many entries to allow in a multiple before we use a switch"), cl::init(10)); static cl::opt<unsigned> RelooperNestingLimit( "relooper-nesting-limit", cl::desc( "How much nesting is acceptable"), cl::init(20)); namespace { /// /// Implements the relooper algorithm for a function's blocks. /// /// Implementation details: The Relooper instance has /// ownership of the blocks and shapes, and frees them when done. /// struct RelooperAlgorithm { std::deque<Block *> Blocks; std::deque<Shape *> Shapes; Shape *Root; bool MinSize; int BlockIdCounter; int ShapeIdCounter; RelooperAlgorithm(); ~RelooperAlgorithm(); void AddBlock(Block *New, int Id = -1); // Calculates the shapes void Calculate(Block *Entry); // Sets us to try to minimize size void SetMinSize(bool MinSize_) { MinSize = MinSize_; } }; struct RelooperAnalysis final : public FunctionPass { static char ID; RelooperAnalysis() : FunctionPass(ID) {} const char *getPassName() const override { return "relooper"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } bool runOnFunction(Function &F) override; }; } // RelooperAnalysis char RelooperAnalysis::ID = 0; FunctionPass *llvm::createWebAssemblyRelooper() { return new RelooperAnalysis(); } bool RelooperAnalysis::runOnFunction(Function &F) { DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n"); RelooperAlgorithm R; // FIXME: remove duplication between relooper's and LLVM's BBs. std::map<const BasicBlock *, Block *> BB2B; std::map<const Block *, const BasicBlock *> B2BB; for (const BasicBlock &BB : F) { // FIXME: getName is wrong here, Code is meant to represent amount of code. // FIXME: use BranchVarInit for switch. Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr); R.AddBlock(B); assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice"); assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice"); BB2B[&BB] = B; B2BB[B] = &BB; } for (Block *B : R.Blocks) { const BasicBlock *BB = B2BB[B]; for (const BasicBlock *Successor : successors(BB)) // FIXME: add branch's Condition and Code below. B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr); } R.Calculate(BB2B[&F.getEntryBlock()]); return false; // Analysis passes don't modify anything. } // Helpers typedef MapVector<Block *, BlockSet> BlockBlockSetMap; typedef std::list<Block *> BlockList; template <class T, class U> static bool contains(const T &container, const U &contained) { return container.count(contained); } // Branch Branch::Branch(const char *ConditionInit, const char *CodeInit) : Ancestor(nullptr), Labeled(true) { // FIXME: move from char* to LLVM data structures Condition = ConditionInit ? strdup(ConditionInit) : nullptr; Code = CodeInit ? strdup(CodeInit) : nullptr; } Branch::~Branch() { // FIXME: move from char* to LLVM data structures free(static_cast<void *>(const_cast<char *>(Condition))); free(static_cast<void *>(const_cast<char *>(Code))); } // Block Block::Block(const char *CodeInit, const char *BranchVarInit) : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) { // FIXME: move from char* to LLVM data structures Code = strdup(CodeInit); BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr; } Block::~Block() { // FIXME: move from char* to LLVM data structures free(static_cast<void *>(const_cast<char *>(Code))); free(static_cast<void *>(const_cast<char *>(BranchVar))); } void Block::AddBranchTo(Block *Target, const char *Condition, const char *Code) { assert(!contains(BranchesOut, Target) && "cannot add more than one branch to the same target"); BranchesOut[Target] = make_unique<Branch>(Condition, Code); } // Relooper RelooperAlgorithm::RelooperAlgorithm() : Root(nullptr), MinSize(false), BlockIdCounter(1), ShapeIdCounter(0) { // block ID 0 is reserved for clearings } RelooperAlgorithm::~RelooperAlgorithm() { for (auto Curr : Blocks) delete Curr; for (auto Curr : Shapes) delete Curr; } void RelooperAlgorithm::AddBlock(Block *New, int Id) { New->Id = Id == -1 ? BlockIdCounter++ : Id; Blocks.push_back(New); } struct RelooperRecursor { RelooperAlgorithm *Parent; RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} }; void RelooperAlgorithm::Calculate(Block *Entry) { // Scan and optimize the input struct PreOptimizer : public RelooperRecursor { PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} BlockSet Live; void FindLive(Block *Root) { BlockList ToInvestigate; ToInvestigate.push_back(Root); while (!ToInvestigate.empty()) { Block *Curr = ToInvestigate.front(); ToInvestigate.pop_front(); if (contains(Live, Curr)) continue; Live.insert(Curr); for (const auto &iter : Curr->BranchesOut) ToInvestigate.push_back(iter.first); } } // If a block has multiple entries but no exits, and it is small enough, it // is useful to split it. A common example is a C++ function where // everything ends up at a final exit block and does some RAII cleanup. // Without splitting, we will be forced to introduce labelled loops to // allow reaching the final block void SplitDeadEnds() { unsigned TotalCodeSize = 0; for (const auto &Curr : Live) { TotalCodeSize += strlen(Curr->Code); } BlockSet Splits; BlockSet Removed; for (const auto &Original : Live) { if (Original->BranchesIn.size() <= 1 || !Original->BranchesOut.empty()) continue; // only dead ends, for now if (contains(Original->BranchesOut, Original)) continue; // cannot split a looping node if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) > TotalCodeSize / RelooperSplittingFactor) continue; // if splitting increases raw code size by a significant // amount, abort // Split the node (for simplicity, we replace all the blocks, even // though we could have reused the original) DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n"); for (const auto &Prior : Original->BranchesIn) { Block *Split = new Block(Original->Code, Original->BranchVar); Parent->AddBlock(Split, Original->Id); Split->BranchesIn.insert(Prior); std::unique_ptr<Branch> Details; Details.swap(Prior->BranchesOut[Original]); Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition, Details->Code); for (const auto &iter : Original->BranchesOut) { Block *Post = iter.first; Branch *Details = iter.second.get(); Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition, Details->Code); Post->BranchesIn.insert(Split); } Splits.insert(Split); Removed.insert(Original); } for (const auto &iter : Original->BranchesOut) { Block *Post = iter.first; Post->BranchesIn.remove(Original); } } for (const auto &iter : Splits) Live.insert(iter); for (const auto &iter : Removed) Live.remove(iter); } }; PreOptimizer Pre(this); Pre.FindLive(Entry); // Add incoming branches from live blocks, ignoring dead code for (unsigned i = 0; i < Blocks.size(); i++) { Block *Curr = Blocks[i]; if (!contains(Pre.Live, Curr)) continue; for (const auto &iter : Curr->BranchesOut) iter.first->BranchesIn.insert(Curr); } if (!MinSize) Pre.SplitDeadEnds(); // Recursively process the graph struct Analyzer : public RelooperRecursor { Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} // Add a shape to the list of shapes in this Relooper calculation void Notice(Shape *New) { New->Id = Parent->ShapeIdCounter++; Parent->Shapes.push_back(New); } // Create a list of entries from a block. If LimitTo is provided, only // results in that set will appear void GetBlocksOut(Block *Source, BlockSet &Entries, BlockSet *LimitTo = nullptr) { for (const auto &iter : Source->BranchesOut) if (!LimitTo || contains(*LimitTo, iter.first)) Entries.insert(iter.first); } // Converts/processes all branchings to a specific target void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, BlockSet &From) { DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type << "\n"); for (auto iter = Target->BranchesIn.begin(); iter != Target->BranchesIn.end();) { Block *Prior = *iter; if (!contains(From, Prior)) { iter++; continue; } std::unique_ptr<Branch> PriorOut; PriorOut.swap(Prior->BranchesOut[Target]); PriorOut->Ancestor = Ancestor; PriorOut->Type = Type; if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor)) Multiple->Breaks++; // We are breaking out of this Multiple, so need a // loop iter++; // carefully increment iter before erasing Target->BranchesIn.remove(Prior); Target->ProcessedBranchesIn.insert(Prior); Prior->ProcessedBranchesOut[Target].swap(PriorOut); } } Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n"); SimpleShape *Simple = new SimpleShape; Notice(Simple); Simple->Inner = Inner; Inner->Parent = Simple; if (Blocks.size() > 1) { Blocks.remove(Inner); GetBlocksOut(Inner, NextEntries, &Blocks); BlockSet JustInner; JustInner.insert(Inner); for (const auto &iter : NextEntries) Solipsize(iter, Branch::Direct, Simple, JustInner); } return Simple; } Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries, BlockSet &NextEntries) { // Find the inner blocks in this loop. Proceed backwards from the entries // until // you reach a seen block, collecting as you go. BlockSet InnerBlocks; BlockSet Queue = Entries; while (!Queue.empty()) { Block *Curr = *(Queue.begin()); Queue.remove(*Queue.begin()); if (!contains(InnerBlocks, Curr)) { // This element is new, mark it as inner and remove from outer InnerBlocks.insert(Curr); Blocks.remove(Curr); // Add the elements prior to it for (const auto &iter : Curr->BranchesIn) Queue.insert(iter); } } assert(!InnerBlocks.empty()); for (const auto &Curr : InnerBlocks) { for (const auto &iter : Curr->BranchesOut) { Block *Possible = iter.first; if (!contains(InnerBlocks, Possible)) NextEntries.insert(Possible); } } LoopShape *Loop = new LoopShape(); Notice(Loop); // Solipsize the loop, replacing with break/continue and marking branches // as Processed (will not affect later calculations) // A. Branches to the loop entries become a continue to this shape for (const auto &iter : Entries) Solipsize(iter, Branch::Continue, Loop, InnerBlocks); // B. Branches to outside the loop (a next entry) become breaks on this // shape for (const auto &iter : NextEntries) Solipsize(iter, Branch::Break, Loop, InnerBlocks); // Finish up Shape *Inner = Process(InnerBlocks, Entries, nullptr); Loop->Inner = Inner; return Loop; } // For each entry, find the independent group reachable by it. The // independent group is the entry itself, plus all the blocks it can // reach that cannot be directly reached by another entry. Note that we // ignore directly reaching the entry itself by another entry. // @param Ignore - previous blocks that are irrelevant void FindIndependentGroups(BlockSet &Entries, BlockBlockSetMap &IndependentGroups, BlockSet *Ignore = nullptr) { typedef std::map<Block *, Block *> BlockBlockMap; struct HelperClass { BlockBlockSetMap &IndependentGroups; BlockBlockMap Ownership; // For each block, which entry it belongs to. // We have reached it from there. HelperClass(BlockBlockSetMap &IndependentGroupsInit) : IndependentGroups(IndependentGroupsInit) {} void InvalidateWithChildren(Block *New) { // Being in the list means you need to be invalidated BlockList ToInvalidate; ToInvalidate.push_back(New); while (!ToInvalidate.empty()) { Block *Invalidatee = ToInvalidate.front(); ToInvalidate.pop_front(); Block *Owner = Ownership[Invalidatee]; // Owner may have been invalidated, do not add to // IndependentGroups! if (contains(IndependentGroups, Owner)) IndependentGroups[Owner].remove(Invalidatee); if (Ownership[Invalidatee]) { // may have been seen before and // invalidated already Ownership[Invalidatee] = nullptr; for (const auto &iter : Invalidatee->BranchesOut) { Block *Target = iter.first; BlockBlockMap::iterator Known = Ownership.find(Target); if (Known != Ownership.end()) { Block *TargetOwner = Known->second; if (TargetOwner) ToInvalidate.push_back(Target); } } } } } }; HelperClass Helper(IndependentGroups); // We flow out from each of the entries, simultaneously. // When we reach a new block, we add it as belonging to the one we got to // it from. // If we reach a new block that is already marked as belonging to someone, // it is reachable by two entries and is not valid for any of them. // Remove it and all it can reach that have been visited. // Being in the queue means we just added this item, and // we need to add its children BlockList Queue; for (const auto &Entry : Entries) { Helper.Ownership[Entry] = Entry; IndependentGroups[Entry].insert(Entry); Queue.push_back(Entry); } while (!Queue.empty()) { Block *Curr = Queue.front(); Queue.pop_front(); Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership // map if we are in the queue if (!Owner) continue; // we have been invalidated meanwhile after being reached // from two entries // Add all children for (const auto &iter : Curr->BranchesOut) { Block *New = iter.first; BlockBlockMap::iterator Known = Helper.Ownership.find(New); if (Known == Helper.Ownership.end()) { // New node. Add it, and put it in the queue Helper.Ownership[New] = Owner; IndependentGroups[Owner].insert(New); Queue.push_back(New); continue; } Block *NewOwner = Known->second; if (!NewOwner) continue; // We reached an invalidated node if (NewOwner != Owner) // Invalidate this and all reachable that we have seen - we reached // this from two locations Helper.InvalidateWithChildren(New); // otherwise, we have the same owner, so do nothing } } // Having processed all the interesting blocks, we remain with just one // potential issue: // If a->b, and a was invalidated, but then b was later reached by // someone else, we must invalidate b. To check for this, we go over all // elements in the independent groups, if an element has a parent which // does *not* have the same owner, we/ must remove it and all its // children. for (const auto &iter : Entries) { BlockSet &CurrGroup = IndependentGroups[iter]; BlockList ToInvalidate; for (const auto &iter : CurrGroup) { Block *Child = iter; for (const auto &iter : Child->BranchesIn) { Block *Parent = iter; if (Ignore && contains(*Ignore, Parent)) continue; if (Helper.Ownership[Parent] != Helper.Ownership[Child]) ToInvalidate.push_back(Child); } } while (!ToInvalidate.empty()) { Block *Invalidatee = ToInvalidate.front(); ToInvalidate.pop_front(); Helper.InvalidateWithChildren(Invalidatee); } } // Remove empty groups for (const auto &iter : Entries) if (IndependentGroups[iter].empty()) IndependentGroups.erase(iter); } Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries, BlockBlockSetMap &IndependentGroups, Shape *Prev, BlockSet &NextEntries) { bool Fused = isa<SimpleShape>(Prev); MultipleShape *Multiple = new MultipleShape(); Notice(Multiple); BlockSet CurrEntries; for (auto &iter : IndependentGroups) { Block *CurrEntry = iter.first; BlockSet &CurrBlocks = iter.second; // Create inner block CurrEntries.clear(); CurrEntries.insert(CurrEntry); for (const auto &CurrInner : CurrBlocks) { // Remove the block from the remaining blocks Blocks.remove(CurrInner); // Find new next entries and fix branches to them for (auto iter = CurrInner->BranchesOut.begin(); iter != CurrInner->BranchesOut.end();) { Block *CurrTarget = iter->first; auto Next = iter; Next++; if (!contains(CurrBlocks, CurrTarget)) { NextEntries.insert(CurrTarget); Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); } iter = Next; // increment carefully because Solipsize can remove us } } Multiple->InnerMap[CurrEntry->Id] = Process(CurrBlocks, CurrEntries, nullptr); // If we are not fused, then our entries will actually be checked if (!Fused) CurrEntry->IsCheckedMultipleEntry = true; } // Add entries not handled as next entries, they are deferred for (const auto &Entry : Entries) if (!contains(IndependentGroups, Entry)) NextEntries.insert(Entry); // The multiple has been created, we can decide how to implement it if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) { Multiple->UseSwitch = true; Multiple->Breaks++; // switch captures breaks } return Multiple; } // Main function. // Process a set of blocks with specified entries, returns a shape // The Make* functions receive a NextEntries. If they fill it with data, // those are the entries for the ->Next block on them, and the blocks // are what remains in Blocks (which Make* modify). In this way // we avoid recursing on Next (imagine a long chain of Simples, if we // recursed we could blow the stack). Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) { BlockSet *Entries = &InitialEntries; BlockSet TempEntries[2]; int CurrTempIndex = 0; BlockSet *NextEntries; Shape *Ret = nullptr; auto Make = [&](Shape *Temp) { if (Prev) Prev->Next = Temp; if (!Ret) Ret = Temp; Prev = Temp; Entries = NextEntries; }; while (1) { CurrTempIndex = 1 - CurrTempIndex; NextEntries = &TempEntries[CurrTempIndex]; NextEntries->clear(); if (Entries->empty()) return Ret; if (Entries->size() == 1) { Block *Curr = *(Entries->begin()); if (Curr->BranchesIn.empty()) { // One entry, no looping ==> Simple Make(MakeSimple(Blocks, Curr, *NextEntries)); if (NextEntries->empty()) return Ret; continue; } // One entry, looping ==> Loop Make(MakeLoop(Blocks, *Entries, *NextEntries)); if (NextEntries->empty()) return Ret; continue; } // More than one entry, try to eliminate through a Multiple groups of // independent blocks from an entry/ies. It is important to remove // through multiples as opposed to looping since the former is more // performant. BlockBlockSetMap IndependentGroups; FindIndependentGroups(*Entries, IndependentGroups); if (!IndependentGroups.empty()) { // We can handle a group in a multiple if its entry cannot be reached // by another group. // Note that it might be reachable by itself - a loop. But that is // fine, we will create a loop inside the multiple block (which // is the performant order to do it). for (auto iter = IndependentGroups.begin(); iter != IndependentGroups.end();) { Block *Entry = iter->first; BlockSet &Group = iter->second; auto curr = iter++; // iterate carefully, we may delete for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); iterBranch != Entry->BranchesIn.end(); iterBranch++) { Block *Origin = *iterBranch; if (!contains(Group, Origin)) { // Reached from outside the group, so we cannot handle this IndependentGroups.erase(curr); break; } } } // As an optimization, if we have 2 independent groups, and one is a // small dead end, we can handle only that dead end. // The other then becomes a Next - without nesting in the code and // recursion in the analysis. // TODO: if the larger is the only dead end, handle that too // TODO: handle >2 groups // TODO: handle not just dead ends, but also that do not branch to the // NextEntries. However, must be careful there since we create a // Next, and that Next can prevent eliminating a break (since we no // longer naturally reach the same place), which may necessitate a // one-time loop, which makes the unnesting pointless. if (IndependentGroups.size() == 2) { // Find the smaller one auto iter = IndependentGroups.begin(); Block *SmallEntry = iter->first; auto SmallSize = iter->second.size(); iter++; Block *LargeEntry = iter->first; auto LargeSize = iter->second.size(); if (SmallSize != LargeSize) { // ignore the case where they are // identical - keep things symmetrical // there if (SmallSize > LargeSize) { Block *Temp = SmallEntry; SmallEntry = LargeEntry; LargeEntry = Temp; // Note: we did not flip the Sizes too, they // are now invalid. TODO: use the smaller // size as a limit? } // Check if dead end bool DeadEnd = true; BlockSet &SmallGroup = IndependentGroups[SmallEntry]; for (const auto &Curr : SmallGroup) { for (const auto &iter : Curr->BranchesOut) { Block *Target = iter.first; if (!contains(SmallGroup, Target)) { DeadEnd = false; break; } } if (!DeadEnd) break; } if (DeadEnd) IndependentGroups.erase(LargeEntry); } } if (!IndependentGroups.empty()) // Some groups removable ==> Multiple Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, *NextEntries)); if (NextEntries->empty()) return Ret; continue; } // No independent groups, must be loopable ==> Loop Make(MakeLoop(Blocks, *Entries, *NextEntries)); if (NextEntries->empty()) return Ret; continue; } } }; // Main BlockSet AllBlocks; for (const auto &Curr : Pre.Live) { AllBlocks.insert(Curr); } BlockSet Entries; Entries.insert(Entry); Root = Analyzer(this).Process(AllBlocks, Entries, nullptr); assert(Root); /// /// Relooper post-optimizer /// struct PostOptimizer { RelooperAlgorithm *Parent; std::stack<Shape *> LoopStack; PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} void ShapeSwitch(Shape* var, std::function<void (SimpleShape*)> simple, std::function<void (MultipleShape*)> multiple, std::function<void (LoopShape*)> loop) { switch (var->getKind()) { case Shape::SK_Simple: { simple(cast<SimpleShape>(var)); break; } case Shape::SK_Multiple: { multiple(cast<MultipleShape>(var)); break; } case Shape::SK_Loop: { loop(cast<LoopShape>(var)); break; } } } // Find the blocks that natural control flow can get us directly to, or // through a multiple that we ignore void FollowNaturalFlow(Shape *S, BlockSet &Out) { ShapeSwitch(S, [&](SimpleShape* Simple) { Out.insert(Simple->Inner); }, [&](MultipleShape* Multiple) { for (const auto &iter : Multiple->InnerMap) { FollowNaturalFlow(iter.second, Out); } FollowNaturalFlow(Multiple->Next, Out); }, [&](LoopShape* Loop) { FollowNaturalFlow(Loop->Inner, Out); }); } void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) { if (Root->Next) { Root->Natural = Root->Next; FindNaturals(Root->Next, Otherwise); } else { Root->Natural = Otherwise; } ShapeSwitch(Root, [](SimpleShape* Simple) { }, [&](MultipleShape* Multiple) { for (const auto &iter : Multiple->InnerMap) { FindNaturals(iter.second, Root->Natural); } }, [&](LoopShape* Loop){ FindNaturals(Loop->Inner, Loop->Inner); }); } // Remove unneeded breaks and continues. // A flow operation is trivially unneeded if the shape we naturally get to // by normal code execution is the same as the flow forces us to. void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr, LoopShape *LastLoop = nullptr, unsigned Depth = 0) { BlockSet NaturalBlocks; FollowNaturalFlow(Natural, NaturalBlocks); Shape *Next = Root; while (Next) { Root = Next; Next = nullptr; ShapeSwitch( Root, [&](SimpleShape* Simple) { if (Simple->Inner->BranchVar) LastLoop = nullptr; // a switch clears out the loop (TODO: only for // breaks, not continue) if (Simple->Next) { if (!Simple->Inner->BranchVar && Simple->Inner->ProcessedBranchesOut.size() == 2 && Depth < RelooperNestingLimit) { // If there is a next block, we already know at Simple // creation time to make direct branches, and we can do // nothing more in general. But, we try to optimize the // case of a break and a direct: This would normally be // if (break?) { break; } .. // but if we make sure to nest the else, we can save the // break, // if (!break?) { .. } // This is also better because the more canonical nested // form is easier to further optimize later. The // downside is more nesting, which adds to size in builds with // whitespace. // Note that we avoid switches, as it complicates control flow // and is not relevant for the common case we optimize here. bool Found = false; bool Abort = false; for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { Block *Target = iter.first; Branch *Details = iter.second.get(); if (Details->Type == Branch::Break) { Found = true; if (!contains(NaturalBlocks, Target)) Abort = true; } else if (Details->Type != Branch::Direct) Abort = true; } if (Found && !Abort) { for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { Branch *Details = iter.second.get(); if (Details->Type == Branch::Break) { Details->Type = Branch::Direct; if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Details->Ancestor)) Multiple->Breaks--; } else { assert(Details->Type == Branch::Direct); Details->Type = Branch::Nested; } } } Depth++; // this optimization increases depth, for us and all // our next chain (i.e., until this call returns) } Next = Simple->Next; } else { // If there is no next then Natural is where we will // go to by doing nothing, so we can potentially optimize some // branches to direct. for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { Block *Target = iter.first; Branch *Details = iter.second.get(); if (Details->Type != Branch::Direct && contains(NaturalBlocks, Target)) { // note: cannot handle split blocks Details->Type = Branch::Direct; if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Details->Ancestor)) Multiple->Breaks--; } else if (Details->Type == Branch::Break && LastLoop && LastLoop->Natural == Details->Ancestor->Natural) { // it is important to simplify breaks, as simpler breaks // enable other optimizations Details->Labeled = false; if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Details->Ancestor)) Multiple->Breaks--; } } } }, [&](MultipleShape* Multiple) { for (const auto &iter : Multiple->InnerMap) { RemoveUnneededFlows(iter.second, Multiple->Next, Multiple->Breaks ? nullptr : LastLoop, Depth + 1); } Next = Multiple->Next; }, [&](LoopShape* Loop) { RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1); Next = Loop->Next; }); } } // After we know which loops exist, we can calculate which need to be // labeled void FindLabeledLoops(Shape *Root) { Shape *Next = Root; while (Next) { Root = Next; Next = nullptr; ShapeSwitch( Root, [&](SimpleShape *Simple) { MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next); // If we are fusing a Multiple with a loop into this Simple, then // visit it now if (Fused && Fused->Breaks) LoopStack.push(Fused); if (Simple->Inner->BranchVar) LoopStack.push(nullptr); // a switch means breaks are now useless, // push a dummy if (Fused) { if (Fused->UseSwitch) LoopStack.push(nullptr); // a switch means breaks are now // useless, push a dummy for (const auto &iter : Fused->InnerMap) { FindLabeledLoops(iter.second); } } for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { Branch *Details = iter.second.get(); if (Details->Type == Branch::Break || Details->Type == Branch::Continue) { assert(!LoopStack.empty()); if (Details->Ancestor != LoopStack.top() && Details->Labeled) { if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Details->Ancestor)) { Multiple->Labeled = true; } else { LoopShape *Loop = cast<LoopShape>(Details->Ancestor); Loop->Labeled = true; } } else { Details->Labeled = false; } } if (Fused && Fused->UseSwitch) LoopStack.pop(); if (Simple->Inner->BranchVar) LoopStack.pop(); if (Fused && Fused->Breaks) LoopStack.pop(); if (Fused) Next = Fused->Next; else Next = Root->Next; } } , [&](MultipleShape* Multiple) { if (Multiple->Breaks) LoopStack.push(Multiple); for (const auto &iter : Multiple->InnerMap) FindLabeledLoops(iter.second); if (Multiple->Breaks) LoopStack.pop(); Next = Root->Next; } , [&](LoopShape* Loop) { LoopStack.push(Loop); FindLabeledLoops(Loop->Inner); LoopStack.pop(); Next = Root->Next; }); } } void Process(Shape * Root) { FindNaturals(Root); RemoveUnneededFlows(Root); FindLabeledLoops(Root); } }; PostOptimizer(this).Process(Root); }