1 //===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===---------------------------------------------------------------------===//
11 /// \brief This implements the Relooper algorithm. This implementation includes
12 /// optimizations added since the original academic paper [1] was published.
14 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
15 /// Proceedings of the ACM international conference companion on Object
16 /// oriented programming systems languages and applications companion
17 /// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
18 /// http://doi.acm.org/10.1145/2048147.2048224
20 //===-------------------------------------------------------------------===//
23 #include "WebAssembly.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/raw_ostream.h"
40 #define DEBUG_TYPE "relooper"
43 using namespace Relooper;
45 static cl::opt<int> RelooperSplittingFactor(
46 "relooper-splitting-factor",
48 "How much to discount code size when deciding whether to split a node"),
51 static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
52 "relooper-multiple-switch-threshold",
54 "How many entries to allow in a multiple before we use a switch"),
57 static cl::opt<unsigned> RelooperNestingLimit(
58 "relooper-nesting-limit",
60 "How much nesting is acceptable"),
66 /// Implements the relooper algorithm for a function's blocks.
68 /// Implementation details: The Relooper instance has
69 /// ownership of the blocks and shapes, and frees them when done.
71 struct RelooperAlgorithm {
72 std::deque<Block *> Blocks;
73 std::deque<Shape *> Shapes;
82 void AddBlock(Block *New, int Id = -1);
84 // Calculates the shapes
85 void Calculate(Block *Entry);
87 // Sets us to try to minimize size
88 void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
91 struct RelooperAnalysis final : public FunctionPass {
93 RelooperAnalysis() : FunctionPass(ID) {}
94 const char *getPassName() const override { return "relooper"; }
95 void getAnalysisUsage(AnalysisUsage &AU) const override {
98 bool runOnFunction(Function &F) override;
104 char RelooperAnalysis::ID = 0;
105 FunctionPass *llvm::createWebAssemblyRelooper() {
106 return new RelooperAnalysis();
109 bool RelooperAnalysis::runOnFunction(Function &F) {
110 DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
112 // FIXME: remove duplication between relooper's and LLVM's BBs.
113 std::map<const BasicBlock *, Block *> BB2B;
114 std::map<const Block *, const BasicBlock *> B2BB;
115 for (const BasicBlock &BB : F) {
116 // FIXME: getName is wrong here, Code is meant to represent amount of code.
117 // FIXME: use BranchVarInit for switch.
118 Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
120 assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
121 assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
125 for (Block *B : R.Blocks) {
126 const BasicBlock *BB = B2BB[B];
127 for (const BasicBlock *Successor : successors(BB))
128 // FIXME: add branch's Condition and Code below.
129 B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
131 R.Calculate(BB2B[&F.getEntryBlock()]);
132 return false; // Analysis passes don't modify anything.
137 typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
138 typedef std::list<Block *> BlockList;
140 template <class T, class U>
141 static bool contains(const T &container, const U &contained) {
142 return container.count(contained);
148 Branch::Branch(const char *ConditionInit, const char *CodeInit)
149 : Ancestor(nullptr), Labeled(true) {
150 // FIXME: move from char* to LLVM data structures
151 Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
152 Code = CodeInit ? strdup(CodeInit) : nullptr;
156 // FIXME: move from char* to LLVM data structures
157 free(static_cast<void *>(const_cast<char *>(Condition)));
158 free(static_cast<void *>(const_cast<char *>(Code)));
163 Block::Block(const char *CodeInit, const char *BranchVarInit)
164 : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
165 // FIXME: move from char* to LLVM data structures
166 Code = strdup(CodeInit);
167 BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
171 // FIXME: move from char* to LLVM data structures
172 free(static_cast<void *>(const_cast<char *>(Code)));
173 free(static_cast<void *>(const_cast<char *>(BranchVar)));
176 void Block::AddBranchTo(Block *Target, const char *Condition,
178 assert(!contains(BranchesOut, Target) &&
179 "cannot add more than one branch to the same target");
180 BranchesOut[Target] = make_unique<Branch>(Condition, Code);
185 RelooperAlgorithm::RelooperAlgorithm()
186 : Root(nullptr), MinSize(false), BlockIdCounter(1),
187 ShapeIdCounter(0) { // block ID 0 is reserved for clearings
190 RelooperAlgorithm::~RelooperAlgorithm() {
191 for (auto Curr : Blocks)
193 for (auto Curr : Shapes)
197 void RelooperAlgorithm::AddBlock(Block *New, int Id) {
198 New->Id = Id == -1 ? BlockIdCounter++ : Id;
199 Blocks.push_back(New);
202 struct RelooperRecursor {
203 RelooperAlgorithm *Parent;
204 RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
207 void RelooperAlgorithm::Calculate(Block *Entry) {
208 // Scan and optimize the input
209 struct PreOptimizer : public RelooperRecursor {
210 PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
213 void FindLive(Block *Root) {
214 BlockList ToInvestigate;
215 ToInvestigate.push_back(Root);
216 while (!ToInvestigate.empty()) {
217 Block *Curr = ToInvestigate.front();
218 ToInvestigate.pop_front();
219 if (contains(Live, Curr))
222 for (const auto &iter : Curr->BranchesOut)
223 ToInvestigate.push_back(iter.first);
227 // If a block has multiple entries but no exits, and it is small enough, it
228 // is useful to split it. A common example is a C++ function where
229 // everything ends up at a final exit block and does some RAII cleanup.
230 // Without splitting, we will be forced to introduce labelled loops to
231 // allow reaching the final block
232 void SplitDeadEnds() {
233 unsigned TotalCodeSize = 0;
234 for (const auto &Curr : Live) {
235 TotalCodeSize += strlen(Curr->Code);
239 for (const auto &Original : Live) {
240 if (Original->BranchesIn.size() <= 1 ||
241 !Original->BranchesOut.empty())
242 continue; // only dead ends, for now
243 if (contains(Original->BranchesOut, Original))
244 continue; // cannot split a looping node
245 if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
246 TotalCodeSize / RelooperSplittingFactor)
247 continue; // if splitting increases raw code size by a significant
249 // Split the node (for simplicity, we replace all the blocks, even
250 // though we could have reused the original)
251 DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n");
252 for (const auto &Prior : Original->BranchesIn) {
253 Block *Split = new Block(Original->Code, Original->BranchVar);
254 Parent->AddBlock(Split, Original->Id);
255 Split->BranchesIn.insert(Prior);
256 std::unique_ptr<Branch> Details;
257 Details.swap(Prior->BranchesOut[Original]);
258 Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
260 for (const auto &iter : Original->BranchesOut) {
261 Block *Post = iter.first;
262 Branch *Details = iter.second.get();
263 Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
265 Post->BranchesIn.insert(Split);
267 Splits.insert(Split);
268 Removed.insert(Original);
270 for (const auto &iter : Original->BranchesOut) {
271 Block *Post = iter.first;
272 Post->BranchesIn.remove(Original);
275 for (const auto &iter : Splits)
277 for (const auto &iter : Removed)
281 PreOptimizer Pre(this);
284 // Add incoming branches from live blocks, ignoring dead code
285 for (unsigned i = 0; i < Blocks.size(); i++) {
286 Block *Curr = Blocks[i];
287 if (!contains(Pre.Live, Curr))
289 for (const auto &iter : Curr->BranchesOut)
290 iter.first->BranchesIn.insert(Curr);
296 // Recursively process the graph
298 struct Analyzer : public RelooperRecursor {
299 Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
301 // Add a shape to the list of shapes in this Relooper calculation
302 void Notice(Shape *New) {
303 New->Id = Parent->ShapeIdCounter++;
304 Parent->Shapes.push_back(New);
307 // Create a list of entries from a block. If LimitTo is provided, only
308 // results in that set will appear
309 void GetBlocksOut(Block *Source, BlockSet &Entries,
310 BlockSet *LimitTo = nullptr) {
311 for (const auto &iter : Source->BranchesOut)
312 if (!LimitTo || contains(*LimitTo, iter.first))
313 Entries.insert(iter.first);
316 // Converts/processes all branchings to a specific target
317 void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
319 DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type
321 for (auto iter = Target->BranchesIn.begin();
322 iter != Target->BranchesIn.end();) {
323 Block *Prior = *iter;
324 if (!contains(From, Prior)) {
328 std::unique_ptr<Branch> PriorOut;
329 PriorOut.swap(Prior->BranchesOut[Target]);
330 PriorOut->Ancestor = Ancestor;
331 PriorOut->Type = Type;
332 if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
333 Multiple->Breaks++; // We are breaking out of this Multiple, so need a
335 iter++; // carefully increment iter before erasing
336 Target->BranchesIn.remove(Prior);
337 Target->ProcessedBranchesIn.insert(Prior);
338 Prior->ProcessedBranchesOut[Target].swap(PriorOut);
342 Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
343 DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n");
344 SimpleShape *Simple = new SimpleShape;
346 Simple->Inner = Inner;
347 Inner->Parent = Simple;
348 if (Blocks.size() > 1) {
349 Blocks.remove(Inner);
350 GetBlocksOut(Inner, NextEntries, &Blocks);
352 JustInner.insert(Inner);
353 for (const auto &iter : NextEntries)
354 Solipsize(iter, Branch::Direct, Simple, JustInner);
359 Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
360 BlockSet &NextEntries) {
361 // Find the inner blocks in this loop. Proceed backwards from the entries
363 // you reach a seen block, collecting as you go.
364 BlockSet InnerBlocks;
365 BlockSet Queue = Entries;
366 while (!Queue.empty()) {
367 Block *Curr = *(Queue.begin());
368 Queue.remove(*Queue.begin());
369 if (!contains(InnerBlocks, Curr)) {
370 // This element is new, mark it as inner and remove from outer
371 InnerBlocks.insert(Curr);
373 // Add the elements prior to it
374 for (const auto &iter : Curr->BranchesIn)
378 assert(!InnerBlocks.empty());
380 for (const auto &Curr : InnerBlocks) {
381 for (const auto &iter : Curr->BranchesOut) {
382 Block *Possible = iter.first;
383 if (!contains(InnerBlocks, Possible))
384 NextEntries.insert(Possible);
388 LoopShape *Loop = new LoopShape();
391 // Solipsize the loop, replacing with break/continue and marking branches
392 // as Processed (will not affect later calculations)
393 // A. Branches to the loop entries become a continue to this shape
394 for (const auto &iter : Entries)
395 Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
396 // B. Branches to outside the loop (a next entry) become breaks on this
398 for (const auto &iter : NextEntries)
399 Solipsize(iter, Branch::Break, Loop, InnerBlocks);
401 Shape *Inner = Process(InnerBlocks, Entries, nullptr);
406 // For each entry, find the independent group reachable by it. The
407 // independent group is the entry itself, plus all the blocks it can
408 // reach that cannot be directly reached by another entry. Note that we
409 // ignore directly reaching the entry itself by another entry.
410 // @param Ignore - previous blocks that are irrelevant
411 void FindIndependentGroups(BlockSet &Entries,
412 BlockBlockSetMap &IndependentGroups,
413 BlockSet *Ignore = nullptr) {
414 typedef std::map<Block *, Block *> BlockBlockMap;
417 BlockBlockSetMap &IndependentGroups;
418 BlockBlockMap Ownership; // For each block, which entry it belongs to.
419 // We have reached it from there.
421 HelperClass(BlockBlockSetMap &IndependentGroupsInit)
422 : IndependentGroups(IndependentGroupsInit) {}
423 void InvalidateWithChildren(Block *New) {
424 // Being in the list means you need to be invalidated
425 BlockList ToInvalidate;
426 ToInvalidate.push_back(New);
427 while (!ToInvalidate.empty()) {
428 Block *Invalidatee = ToInvalidate.front();
429 ToInvalidate.pop_front();
430 Block *Owner = Ownership[Invalidatee];
431 // Owner may have been invalidated, do not add to
432 // IndependentGroups!
433 if (contains(IndependentGroups, Owner))
434 IndependentGroups[Owner].remove(Invalidatee);
435 if (Ownership[Invalidatee]) { // may have been seen before and
436 // invalidated already
437 Ownership[Invalidatee] = nullptr;
438 for (const auto &iter : Invalidatee->BranchesOut) {
439 Block *Target = iter.first;
440 BlockBlockMap::iterator Known = Ownership.find(Target);
441 if (Known != Ownership.end()) {
442 Block *TargetOwner = Known->second;
444 ToInvalidate.push_back(Target);
451 HelperClass Helper(IndependentGroups);
453 // We flow out from each of the entries, simultaneously.
454 // When we reach a new block, we add it as belonging to the one we got to
456 // If we reach a new block that is already marked as belonging to someone,
457 // it is reachable by two entries and is not valid for any of them.
458 // Remove it and all it can reach that have been visited.
460 // Being in the queue means we just added this item, and
461 // we need to add its children
463 for (const auto &Entry : Entries) {
464 Helper.Ownership[Entry] = Entry;
465 IndependentGroups[Entry].insert(Entry);
466 Queue.push_back(Entry);
468 while (!Queue.empty()) {
469 Block *Curr = Queue.front();
471 Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
472 // map if we are in the queue
474 continue; // we have been invalidated meanwhile after being reached
477 for (const auto &iter : Curr->BranchesOut) {
478 Block *New = iter.first;
479 BlockBlockMap::iterator Known = Helper.Ownership.find(New);
480 if (Known == Helper.Ownership.end()) {
481 // New node. Add it, and put it in the queue
482 Helper.Ownership[New] = Owner;
483 IndependentGroups[Owner].insert(New);
484 Queue.push_back(New);
487 Block *NewOwner = Known->second;
489 continue; // We reached an invalidated node
490 if (NewOwner != Owner)
491 // Invalidate this and all reachable that we have seen - we reached
492 // this from two locations
493 Helper.InvalidateWithChildren(New);
494 // otherwise, we have the same owner, so do nothing
498 // Having processed all the interesting blocks, we remain with just one
500 // If a->b, and a was invalidated, but then b was later reached by
501 // someone else, we must invalidate b. To check for this, we go over all
502 // elements in the independent groups, if an element has a parent which
503 // does *not* have the same owner, we/ must remove it and all its
506 for (const auto &iter : Entries) {
507 BlockSet &CurrGroup = IndependentGroups[iter];
508 BlockList ToInvalidate;
509 for (const auto &iter : CurrGroup) {
511 for (const auto &iter : Child->BranchesIn) {
512 Block *Parent = iter;
513 if (Ignore && contains(*Ignore, Parent))
515 if (Helper.Ownership[Parent] != Helper.Ownership[Child])
516 ToInvalidate.push_back(Child);
519 while (!ToInvalidate.empty()) {
520 Block *Invalidatee = ToInvalidate.front();
521 ToInvalidate.pop_front();
522 Helper.InvalidateWithChildren(Invalidatee);
526 // Remove empty groups
527 for (const auto &iter : Entries)
528 if (IndependentGroups[iter].empty())
529 IndependentGroups.erase(iter);
532 Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
533 BlockBlockSetMap &IndependentGroups, Shape *Prev,
534 BlockSet &NextEntries) {
535 bool Fused = isa<SimpleShape>(Prev);
536 MultipleShape *Multiple = new MultipleShape();
538 BlockSet CurrEntries;
539 for (auto &iter : IndependentGroups) {
540 Block *CurrEntry = iter.first;
541 BlockSet &CurrBlocks = iter.second;
542 // Create inner block
544 CurrEntries.insert(CurrEntry);
545 for (const auto &CurrInner : CurrBlocks) {
546 // Remove the block from the remaining blocks
547 Blocks.remove(CurrInner);
548 // Find new next entries and fix branches to them
549 for (auto iter = CurrInner->BranchesOut.begin();
550 iter != CurrInner->BranchesOut.end();) {
551 Block *CurrTarget = iter->first;
554 if (!contains(CurrBlocks, CurrTarget)) {
555 NextEntries.insert(CurrTarget);
556 Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
558 iter = Next; // increment carefully because Solipsize can remove us
561 Multiple->InnerMap[CurrEntry->Id] =
562 Process(CurrBlocks, CurrEntries, nullptr);
563 // If we are not fused, then our entries will actually be checked
565 CurrEntry->IsCheckedMultipleEntry = true;
567 // Add entries not handled as next entries, they are deferred
568 for (const auto &Entry : Entries)
569 if (!contains(IndependentGroups, Entry))
570 NextEntries.insert(Entry);
571 // The multiple has been created, we can decide how to implement it
572 if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
573 Multiple->UseSwitch = true;
574 Multiple->Breaks++; // switch captures breaks
580 // Process a set of blocks with specified entries, returns a shape
581 // The Make* functions receive a NextEntries. If they fill it with data,
582 // those are the entries for the ->Next block on them, and the blocks
583 // are what remains in Blocks (which Make* modify). In this way
584 // we avoid recursing on Next (imagine a long chain of Simples, if we
585 // recursed we could blow the stack).
586 Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
587 BlockSet *Entries = &InitialEntries;
588 BlockSet TempEntries[2];
589 int CurrTempIndex = 0;
590 BlockSet *NextEntries;
591 Shape *Ret = nullptr;
593 auto Make = [&](Shape *Temp) {
599 Entries = NextEntries;
603 CurrTempIndex = 1 - CurrTempIndex;
604 NextEntries = &TempEntries[CurrTempIndex];
605 NextEntries->clear();
607 if (Entries->empty())
609 if (Entries->size() == 1) {
610 Block *Curr = *(Entries->begin());
611 if (Curr->BranchesIn.empty()) {
612 // One entry, no looping ==> Simple
613 Make(MakeSimple(Blocks, Curr, *NextEntries));
614 if (NextEntries->empty())
618 // One entry, looping ==> Loop
619 Make(MakeLoop(Blocks, *Entries, *NextEntries));
620 if (NextEntries->empty())
625 // More than one entry, try to eliminate through a Multiple groups of
626 // independent blocks from an entry/ies. It is important to remove
627 // through multiples as opposed to looping since the former is more
629 BlockBlockSetMap IndependentGroups;
630 FindIndependentGroups(*Entries, IndependentGroups);
632 if (!IndependentGroups.empty()) {
633 // We can handle a group in a multiple if its entry cannot be reached
635 // Note that it might be reachable by itself - a loop. But that is
636 // fine, we will create a loop inside the multiple block (which
637 // is the performant order to do it).
638 for (auto iter = IndependentGroups.begin();
639 iter != IndependentGroups.end();) {
640 Block *Entry = iter->first;
641 BlockSet &Group = iter->second;
642 auto curr = iter++; // iterate carefully, we may delete
643 for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
644 iterBranch != Entry->BranchesIn.end(); iterBranch++) {
645 Block *Origin = *iterBranch;
646 if (!contains(Group, Origin)) {
647 // Reached from outside the group, so we cannot handle this
648 IndependentGroups.erase(curr);
654 // As an optimization, if we have 2 independent groups, and one is a
655 // small dead end, we can handle only that dead end.
656 // The other then becomes a Next - without nesting in the code and
657 // recursion in the analysis.
658 // TODO: if the larger is the only dead end, handle that too
659 // TODO: handle >2 groups
660 // TODO: handle not just dead ends, but also that do not branch to the
661 // NextEntries. However, must be careful there since we create a
662 // Next, and that Next can prevent eliminating a break (since we no
663 // longer naturally reach the same place), which may necessitate a
664 // one-time loop, which makes the unnesting pointless.
665 if (IndependentGroups.size() == 2) {
666 // Find the smaller one
667 auto iter = IndependentGroups.begin();
668 Block *SmallEntry = iter->first;
669 auto SmallSize = iter->second.size();
671 Block *LargeEntry = iter->first;
672 auto LargeSize = iter->second.size();
673 if (SmallSize != LargeSize) { // ignore the case where they are
674 // identical - keep things symmetrical
676 if (SmallSize > LargeSize) {
677 Block *Temp = SmallEntry;
678 SmallEntry = LargeEntry;
679 LargeEntry = Temp; // Note: we did not flip the Sizes too, they
680 // are now invalid. TODO: use the smaller
685 BlockSet &SmallGroup = IndependentGroups[SmallEntry];
686 for (const auto &Curr : SmallGroup) {
687 for (const auto &iter : Curr->BranchesOut) {
688 Block *Target = iter.first;
689 if (!contains(SmallGroup, Target)) {
698 IndependentGroups.erase(LargeEntry);
702 if (!IndependentGroups.empty())
703 // Some groups removable ==> Multiple
704 Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
706 if (NextEntries->empty())
710 // No independent groups, must be loopable ==> Loop
711 Make(MakeLoop(Blocks, *Entries, *NextEntries));
712 if (NextEntries->empty())
722 for (const auto &Curr : Pre.Live) {
723 AllBlocks.insert(Curr);
727 Entries.insert(Entry);
728 Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
732 /// Relooper post-optimizer
734 struct PostOptimizer {
735 RelooperAlgorithm *Parent;
736 std::stack<Shape *> LoopStack;
738 PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
740 void ShapeSwitch(Shape* var,
741 std::function<void (SimpleShape*)> simple,
742 std::function<void (MultipleShape*)> multiple,
743 std::function<void (LoopShape*)> loop) {
744 switch (var->getKind()) {
745 case Shape::SK_Simple: {
746 simple(cast<SimpleShape>(var));
749 case Shape::SK_Multiple: {
750 multiple(cast<MultipleShape>(var));
753 case Shape::SK_Loop: {
754 loop(cast<LoopShape>(var));
760 // Find the blocks that natural control flow can get us directly to, or
761 // through a multiple that we ignore
762 void FollowNaturalFlow(Shape *S, BlockSet &Out) {
763 ShapeSwitch(S, [&](SimpleShape* Simple) {
764 Out.insert(Simple->Inner);
765 }, [&](MultipleShape* Multiple) {
766 for (const auto &iter : Multiple->InnerMap) {
767 FollowNaturalFlow(iter.second, Out);
769 FollowNaturalFlow(Multiple->Next, Out);
770 }, [&](LoopShape* Loop) {
771 FollowNaturalFlow(Loop->Inner, Out);
775 void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
777 Root->Natural = Root->Next;
778 FindNaturals(Root->Next, Otherwise);
780 Root->Natural = Otherwise;
783 ShapeSwitch(Root, [](SimpleShape* Simple) {
784 }, [&](MultipleShape* Multiple) {
785 for (const auto &iter : Multiple->InnerMap) {
786 FindNaturals(iter.second, Root->Natural);
788 }, [&](LoopShape* Loop){
789 FindNaturals(Loop->Inner, Loop->Inner);
793 // Remove unneeded breaks and continues.
794 // A flow operation is trivially unneeded if the shape we naturally get to
795 // by normal code execution is the same as the flow forces us to.
796 void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
797 LoopShape *LastLoop = nullptr,
798 unsigned Depth = 0) {
799 BlockSet NaturalBlocks;
800 FollowNaturalFlow(Natural, NaturalBlocks);
807 [&](SimpleShape* Simple) {
808 if (Simple->Inner->BranchVar)
810 nullptr; // a switch clears out the loop (TODO: only for
811 // breaks, not continue)
814 if (!Simple->Inner->BranchVar &&
815 Simple->Inner->ProcessedBranchesOut.size() == 2 &&
816 Depth < RelooperNestingLimit) {
817 // If there is a next block, we already know at Simple
818 // creation time to make direct branches, and we can do
819 // nothing more in general. But, we try to optimize the
820 // case of a break and a direct: This would normally be
821 // if (break?) { break; } ..
822 // but if we make sure to nest the else, we can save the
824 // if (!break?) { .. }
825 // This is also better because the more canonical nested
826 // form is easier to further optimize later. The
827 // downside is more nesting, which adds to size in builds with
829 // Note that we avoid switches, as it complicates control flow
830 // and is not relevant for the common case we optimize here.
833 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
834 Block *Target = iter.first;
835 Branch *Details = iter.second.get();
836 if (Details->Type == Branch::Break) {
838 if (!contains(NaturalBlocks, Target))
840 } else if (Details->Type != Branch::Direct)
843 if (Found && !Abort) {
844 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
845 Branch *Details = iter.second.get();
846 if (Details->Type == Branch::Break) {
847 Details->Type = Branch::Direct;
848 if (MultipleShape *Multiple =
849 dyn_cast<MultipleShape>(Details->Ancestor))
852 assert(Details->Type == Branch::Direct);
853 Details->Type = Branch::Nested;
857 Depth++; // this optimization increases depth, for us and all
858 // our next chain (i.e., until this call returns)
862 // If there is no next then Natural is where we will
863 // go to by doing nothing, so we can potentially optimize some
864 // branches to direct.
865 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
866 Block *Target = iter.first;
867 Branch *Details = iter.second.get();
868 if (Details->Type != Branch::Direct &&
869 contains(NaturalBlocks,
870 Target)) { // note: cannot handle split blocks
871 Details->Type = Branch::Direct;
872 if (MultipleShape *Multiple =
873 dyn_cast<MultipleShape>(Details->Ancestor))
875 } else if (Details->Type == Branch::Break && LastLoop &&
876 LastLoop->Natural == Details->Ancestor->Natural) {
877 // it is important to simplify breaks, as simpler breaks
878 // enable other optimizations
879 Details->Labeled = false;
880 if (MultipleShape *Multiple =
881 dyn_cast<MultipleShape>(Details->Ancestor))
886 }, [&](MultipleShape* Multiple)
888 for (const auto &iter : Multiple->InnerMap) {
889 RemoveUnneededFlows(iter.second, Multiple->Next,
890 Multiple->Breaks ? nullptr : LastLoop,
893 Next = Multiple->Next;
894 }, [&](LoopShape* Loop)
896 RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
902 // After we know which loops exist, we can calculate which need to be
904 void FindLabeledLoops(Shape *Root) {
912 [&](SimpleShape *Simple) {
913 MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
914 // If we are fusing a Multiple with a loop into this Simple, then
916 if (Fused && Fused->Breaks)
917 LoopStack.push(Fused);
918 if (Simple->Inner->BranchVar)
919 LoopStack.push(nullptr); // a switch means breaks are now useless,
922 if (Fused->UseSwitch)
923 LoopStack.push(nullptr); // a switch means breaks are now
924 // useless, push a dummy
925 for (const auto &iter : Fused->InnerMap) {
926 FindLabeledLoops(iter.second);
929 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
930 Branch *Details = iter.second.get();
931 if (Details->Type == Branch::Break ||
932 Details->Type == Branch::Continue) {
933 assert(!LoopStack.empty());
934 if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
935 if (MultipleShape *Multiple =
936 dyn_cast<MultipleShape>(Details->Ancestor)) {
937 Multiple->Labeled = true;
939 LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
940 Loop->Labeled = true;
943 Details->Labeled = false;
946 if (Fused && Fused->UseSwitch)
948 if (Simple->Inner->BranchVar)
950 if (Fused && Fused->Breaks)
958 , [&](MultipleShape* Multiple) {
959 if (Multiple->Breaks)
960 LoopStack.push(Multiple);
961 for (const auto &iter : Multiple->InnerMap)
962 FindLabeledLoops(iter.second);
963 if (Multiple->Breaks)
967 , [&](LoopShape* Loop) {
968 LoopStack.push(Loop);
969 FindLabeledLoops(Loop->Inner);
976 void Process(Shape * Root) {
978 RemoveUnneededFlows(Root);
979 FindLabeledLoops(Root);
983 PostOptimizer(this).Process(Root);