lib/Target/WebAssembly/Relooper.cpp

   1 //===-- Relooper.cpp - Top-level interface for WebAssembly  ----*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===---------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This implements the Relooper algorithm. This implementation includes
  12 /// optimizations added since the original academic paper [1] was published.
  13 ///
  14 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
  15 /// Proceedings of the ACM international conference companion on Object
  16 /// oriented programming systems languages and applications companion
  17 /// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
  18 /// http://doi.acm.org/10.1145/2048147.2048224
  19 ///
  20 //===-------------------------------------------------------------------===//
  21
  22 #include "Relooper.h"
  23 #include "WebAssembly.h"
  24
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/IR/CFG.h"
  27 #include "llvm/IR/Function.h"
  28 #include "llvm/Pass.h"
  29 #include "llvm/Support/CommandLine.h"
  30 #include "llvm/Support/Debug.h"
  31 #include "llvm/Support/raw_ostream.h"
  32
  33 #include <cstring>
  34 #include <cstdlib>
  35 #include <functional>
  36 #include <list>
  37 #include <stack>
  38 #include <string>
  39
  40 #define DEBUG_TYPE "relooper"
  41
  42 using namespace llvm;
  43 using namespace Relooper;
  44
  45 static cl::opt<int> RelooperSplittingFactor(
  46     "relooper-splitting-factor",
  47     cl::desc(
  48         "How much to discount code size when deciding whether to split a node"),
  49     cl::init(5));
  50
  51 static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
  52     "relooper-multiple-switch-threshold",
  53     cl::desc(
  54         "How many entries to allow in a multiple before we use a switch"),
  55     cl::init(10));
  56
  57 static cl::opt<unsigned> RelooperNestingLimit(
  58     "relooper-nesting-limit",
  59     cl::desc(
  60         "How much nesting is acceptable"),
  61     cl::init(20));
  62
  63
  64 namespace {
  65 ///
  66 /// Implements the relooper algorithm for a function's blocks.
  67 ///
  68 /// Implementation details: The Relooper instance has
  69 /// ownership of the blocks and shapes, and frees them when done.
  70 ///
  71 struct RelooperAlgorithm {
  72   std::deque<Block *> Blocks;
  73   std::deque<Shape *> Shapes;
  74   Shape *Root;
  75   bool MinSize;
  76   int BlockIdCounter;
  77   int ShapeIdCounter;
  78
  79   RelooperAlgorithm();
  80   ~RelooperAlgorithm();
  81
  82   void AddBlock(Block *New, int Id = -1);
  83
  84   // Calculates the shapes
  85   void Calculate(Block *Entry);
  86
  87   // Sets us to try to minimize size
  88   void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
  89 };
  90
  91 struct RelooperAnalysis final : public FunctionPass {
  92   static char ID;
  93   RelooperAnalysis() : FunctionPass(ID) {}
  94   const char *getPassName() const override { return "relooper"; }
  95   void getAnalysisUsage(AnalysisUsage &AU) const override {
  96     AU.setPreservesAll();
  97   }
  98   bool runOnFunction(Function &F) override;
  99 };
 100 }
 101
 102 // RelooperAnalysis
 103
 104 char RelooperAnalysis::ID = 0;
 105 FunctionPass *llvm::createWebAssemblyRelooper() {
 106   return new RelooperAnalysis();
 107 }
 108
 109 bool RelooperAnalysis::runOnFunction(Function &F) {
 110   DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
 111   RelooperAlgorithm R;
 112   // FIXME: remove duplication between relooper's and LLVM's BBs.
 113   std::map<const BasicBlock *, Block *> BB2B;
 114   std::map<const Block *, const BasicBlock *> B2BB;
 115   for (const BasicBlock &BB : F) {
 116     // FIXME: getName is wrong here, Code is meant to represent amount of code.
 117     // FIXME: use BranchVarInit for switch.
 118     Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
 119     R.AddBlock(B);
 120     assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
 121     assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
 122     BB2B[&BB] = B;
 123     B2BB[B] = &BB;
 124   }
 125   for (Block *B : R.Blocks) {
 126     const BasicBlock *BB = B2BB[B];
 127     for (const BasicBlock *Successor : successors(BB))
 128       // FIXME: add branch's Condition and Code below.
 129       B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
 130   }
 131   R.Calculate(BB2B[&F.getEntryBlock()]);
 132   return false; // Analysis passes don't modify anything.
 133 }
 134
 135 // Helpers
 136
 137 typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
 138 typedef std::list<Block *> BlockList;
 139
 140 template <class T, class U>
 141 static bool contains(const T &container, const U &contained) {
 142   return container.count(contained);
 143 }
 144
 145
 146 // Branch
 147
 148 Branch::Branch(const char *ConditionInit, const char *CodeInit)
 149     : Ancestor(nullptr), Labeled(true) {
 150   // FIXME: move from char* to LLVM data structures
 151   Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
 152   Code = CodeInit ? strdup(CodeInit) : nullptr;
 153 }
 154
 155 Branch::~Branch() {
 156   // FIXME: move from char* to LLVM data structures
 157   free(static_cast<void *>(const_cast<char *>(Condition)));
 158   free(static_cast<void *>(const_cast<char *>(Code)));
 159 }
 160
 161 // Block
 162
 163 Block::Block(const char *CodeInit, const char *BranchVarInit)
 164     : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
 165   // FIXME: move from char* to LLVM data structures
 166   Code = strdup(CodeInit);
 167   BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
 168 }
 169
 170 Block::~Block() {
 171   // FIXME: move from char* to LLVM data structures
 172   free(static_cast<void *>(const_cast<char *>(Code)));
 173   free(static_cast<void *>(const_cast<char *>(BranchVar)));
 174 }
 175
 176 void Block::AddBranchTo(Block *Target, const char *Condition,
 177                         const char *Code) {
 178   assert(!contains(BranchesOut, Target) &&
 179          "cannot add more than one branch to the same target");
 180   BranchesOut[Target] = make_unique<Branch>(Condition, Code);
 181 }
 182
 183 // Relooper
 184
 185 RelooperAlgorithm::RelooperAlgorithm()
 186     : Root(nullptr), MinSize(false), BlockIdCounter(1),
 187       ShapeIdCounter(0) { // block ID 0 is reserved for clearings
 188 }
 189
 190 RelooperAlgorithm::~RelooperAlgorithm() {
 191   for (auto Curr : Blocks)
 192     delete Curr;
 193   for (auto Curr : Shapes)
 194     delete Curr;
 195 }
 196
 197 void RelooperAlgorithm::AddBlock(Block *New, int Id) {
 198   New->Id = Id == -1 ? BlockIdCounter++ : Id;
 199   Blocks.push_back(New);
 200 }
 201
 202 struct RelooperRecursor {
 203   RelooperAlgorithm *Parent;
 204   RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
 205 };
 206
 207 void RelooperAlgorithm::Calculate(Block *Entry) {
 208   // Scan and optimize the input
 209   struct PreOptimizer : public RelooperRecursor {
 210     PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
 211     BlockSet Live;
 212
 213     void FindLive(Block *Root) {
 214       BlockList ToInvestigate;
 215       ToInvestigate.push_back(Root);
 216       while (!ToInvestigate.empty()) {
 217         Block *Curr = ToInvestigate.front();
 218         ToInvestigate.pop_front();
 219         if (contains(Live, Curr))
 220           continue;
 221         Live.insert(Curr);
 222         for (const auto &iter : Curr->BranchesOut)
 223           ToInvestigate.push_back(iter.first);
 224       }
 225     }
 226
 227     // If a block has multiple entries but no exits, and it is small enough, it
 228     // is useful to split it. A common example is a C++ function where
 229     // everything ends up at a final exit block and does some RAII cleanup.
 230     // Without splitting, we will be forced to introduce labelled loops to
 231     // allow reaching the final block
 232     void SplitDeadEnds() {
 233       unsigned TotalCodeSize = 0;
 234       for (const auto &Curr : Live) {
 235         TotalCodeSize += strlen(Curr->Code);
 236       }
 237       BlockSet Splits;
 238       BlockSet Removed;
 239       for (const auto &Original : Live) {
 240         if (Original->BranchesIn.size() <= 1 ||
 241             !Original->BranchesOut.empty())
 242           continue; // only dead ends, for now
 243         if (contains(Original->BranchesOut, Original))
 244           continue; // cannot split a looping node
 245         if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
 246             TotalCodeSize / RelooperSplittingFactor)
 247           continue; // if splitting increases raw code size by a significant
 248                     // amount, abort
 249         // Split the node (for simplicity, we replace all the blocks, even
 250         // though we could have reused the original)
 251         DEBUG(dbgs() << "  Splitting '" << Original->Code << "'\n");
 252         for (const auto &Prior : Original->BranchesIn) {
 253           Block *Split = new Block(Original->Code, Original->BranchVar);
 254           Parent->AddBlock(Split, Original->Id);
 255           Split->BranchesIn.insert(Prior);
 256           std::unique_ptr<Branch> Details;
 257           Details.swap(Prior->BranchesOut[Original]);
 258           Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
 259                                                           Details->Code);
 260           for (const auto &iter : Original->BranchesOut) {
 261             Block *Post = iter.first;
 262             Branch *Details = iter.second.get();
 263             Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
 264                                                            Details->Code);
 265             Post->BranchesIn.insert(Split);
 266           }
 267           Splits.insert(Split);
 268           Removed.insert(Original);
 269         }
 270         for (const auto &iter : Original->BranchesOut) {
 271           Block *Post = iter.first;
 272           Post->BranchesIn.remove(Original);
 273         }
 274       }
 275       for (const auto &iter : Splits)
 276         Live.insert(iter);
 277       for (const auto &iter : Removed)
 278         Live.remove(iter);
 279     }
 280   };
 281   PreOptimizer Pre(this);
 282   Pre.FindLive(Entry);
 283
 284   // Add incoming branches from live blocks, ignoring dead code
 285   for (unsigned i = 0; i < Blocks.size(); i++) {
 286     Block *Curr = Blocks[i];
 287     if (!contains(Pre.Live, Curr))
 288       continue;
 289     for (const auto &iter : Curr->BranchesOut)
 290       iter.first->BranchesIn.insert(Curr);
 291   }
 292
 293   if (!MinSize)
 294     Pre.SplitDeadEnds();
 295
 296   // Recursively process the graph
 297
 298   struct Analyzer : public RelooperRecursor {
 299     Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
 300
 301     // Add a shape to the list of shapes in this Relooper calculation
 302     void Notice(Shape *New) {
 303       New->Id = Parent->ShapeIdCounter++;
 304       Parent->Shapes.push_back(New);
 305     }
 306
 307     // Create a list of entries from a block. If LimitTo is provided, only
 308     // results in that set will appear
 309     void GetBlocksOut(Block *Source, BlockSet &Entries,
 310                       BlockSet *LimitTo = nullptr) {
 311       for (const auto &iter : Source->BranchesOut)
 312         if (!LimitTo || contains(*LimitTo, iter.first))
 313           Entries.insert(iter.first);
 314     }
 315
 316     // Converts/processes all branchings to a specific target
 317     void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
 318                    BlockSet &From) {
 319       DEBUG(dbgs() << "  Solipsize '" << Target->Code << "' type " << Type
 320                    << "\n");
 321       for (auto iter = Target->BranchesIn.begin();
 322            iter != Target->BranchesIn.end();) {
 323         Block *Prior = *iter;
 324         if (!contains(From, Prior)) {
 325           iter++;
 326           continue;
 327         }
 328         std::unique_ptr<Branch> PriorOut;
 329         PriorOut.swap(Prior->BranchesOut[Target]);
 330         PriorOut->Ancestor = Ancestor;
 331         PriorOut->Type = Type;
 332         if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
 333           Multiple->Breaks++; // We are breaking out of this Multiple, so need a
 334                               // loop
 335         iter++; // carefully increment iter before erasing
 336         Target->BranchesIn.remove(Prior);
 337         Target->ProcessedBranchesIn.insert(Prior);
 338         Prior->ProcessedBranchesOut[Target].swap(PriorOut);
 339       }
 340     }
 341
 342     Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
 343       DEBUG(dbgs() << "  MakeSimple inner block '" << Inner->Code << "'\n");
 344       SimpleShape *Simple = new SimpleShape;
 345       Notice(Simple);
 346       Simple->Inner = Inner;
 347       Inner->Parent = Simple;
 348       if (Blocks.size() > 1) {
 349         Blocks.remove(Inner);
 350         GetBlocksOut(Inner, NextEntries, &Blocks);
 351         BlockSet JustInner;
 352         JustInner.insert(Inner);
 353         for (const auto &iter : NextEntries)
 354           Solipsize(iter, Branch::Direct, Simple, JustInner);
 355       }
 356       return Simple;
 357     }
 358
 359     Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
 360                     BlockSet &NextEntries) {
 361       // Find the inner blocks in this loop. Proceed backwards from the entries
 362       // until
 363       // you reach a seen block, collecting as you go.
 364       BlockSet InnerBlocks;
 365       BlockSet Queue = Entries;
 366       while (!Queue.empty()) {
 367         Block *Curr = *(Queue.begin());
 368         Queue.remove(*Queue.begin());
 369         if (!contains(InnerBlocks, Curr)) {
 370           // This element is new, mark it as inner and remove from outer
 371           InnerBlocks.insert(Curr);
 372           Blocks.remove(Curr);
 373           // Add the elements prior to it
 374           for (const auto &iter : Curr->BranchesIn)
 375             Queue.insert(iter);
 376         }
 377       }
 378       assert(!InnerBlocks.empty());
 379
 380       for (const auto &Curr : InnerBlocks) {
 381         for (const auto &iter : Curr->BranchesOut) {
 382           Block *Possible = iter.first;
 383           if (!contains(InnerBlocks, Possible))
 384             NextEntries.insert(Possible);
 385         }
 386       }
 387
 388       LoopShape *Loop = new LoopShape();
 389       Notice(Loop);
 390
 391       // Solipsize the loop, replacing with break/continue and marking branches
 392       // as Processed (will not affect later calculations)
 393       // A. Branches to the loop entries become a continue to this shape
 394       for (const auto &iter : Entries)
 395         Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
 396       // B. Branches to outside the loop (a next entry) become breaks on this
 397       // shape
 398       for (const auto &iter : NextEntries)
 399         Solipsize(iter, Branch::Break, Loop, InnerBlocks);
 400       // Finish up
 401       Shape *Inner = Process(InnerBlocks, Entries, nullptr);
 402       Loop->Inner = Inner;
 403       return Loop;
 404     }
 405
 406     // For each entry, find the independent group reachable by it. The
 407     // independent group is the entry itself, plus all the blocks it can
 408     // reach that cannot be directly reached by another entry. Note that we
 409     // ignore directly reaching the entry itself by another entry.
 410     //   @param Ignore - previous blocks that are irrelevant
 411     void FindIndependentGroups(BlockSet &Entries,
 412                                BlockBlockSetMap &IndependentGroups,
 413                                BlockSet *Ignore = nullptr) {
 414       typedef std::map<Block *, Block *> BlockBlockMap;
 415
 416       struct HelperClass {
 417         BlockBlockSetMap &IndependentGroups;
 418         BlockBlockMap Ownership; // For each block, which entry it belongs to.
 419                                  // We have reached it from there.
 420
 421         HelperClass(BlockBlockSetMap &IndependentGroupsInit)
 422             : IndependentGroups(IndependentGroupsInit) {}
 423         void InvalidateWithChildren(Block *New) {
 424           // Being in the list means you need to be invalidated
 425           BlockList ToInvalidate;
 426           ToInvalidate.push_back(New);
 427           while (!ToInvalidate.empty()) {
 428             Block *Invalidatee = ToInvalidate.front();
 429             ToInvalidate.pop_front();
 430             Block *Owner = Ownership[Invalidatee];
 431             // Owner may have been invalidated, do not add to
 432             // IndependentGroups!
 433             if (contains(IndependentGroups, Owner))
 434               IndependentGroups[Owner].remove(Invalidatee);
 435             if (Ownership[Invalidatee]) { // may have been seen before and
 436                                           // invalidated already
 437               Ownership[Invalidatee] = nullptr;
 438               for (const auto &iter : Invalidatee->BranchesOut) {
 439                 Block *Target = iter.first;
 440                 BlockBlockMap::iterator Known = Ownership.find(Target);
 441                 if (Known != Ownership.end()) {
 442                   Block *TargetOwner = Known->second;
 443                   if (TargetOwner)
 444                     ToInvalidate.push_back(Target);
 445                 }
 446               }
 447             }
 448           }
 449         }
 450       };
 451       HelperClass Helper(IndependentGroups);
 452
 453       // We flow out from each of the entries, simultaneously.
 454       // When we reach a new block, we add it as belonging to the one we got to
 455       // it from.
 456       // If we reach a new block that is already marked as belonging to someone,
 457       // it is reachable by two entries and is not valid for any of them.
 458       // Remove it and all it can reach that have been visited.
 459
 460       // Being in the queue means we just added this item, and
 461       // we need to add its children
 462       BlockList Queue;
 463       for (const auto &Entry : Entries) {
 464         Helper.Ownership[Entry] = Entry;
 465         IndependentGroups[Entry].insert(Entry);
 466         Queue.push_back(Entry);
 467       }
 468       while (!Queue.empty()) {
 469         Block *Curr = Queue.front();
 470         Queue.pop_front();
 471         Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
 472                                                // map if we are in the queue
 473         if (!Owner)
 474           continue; // we have been invalidated meanwhile after being reached
 475                     // from two entries
 476         // Add all children
 477         for (const auto &iter : Curr->BranchesOut) {
 478           Block *New = iter.first;
 479           BlockBlockMap::iterator Known = Helper.Ownership.find(New);
 480           if (Known == Helper.Ownership.end()) {
 481             // New node. Add it, and put it in the queue
 482             Helper.Ownership[New] = Owner;
 483             IndependentGroups[Owner].insert(New);
 484             Queue.push_back(New);
 485             continue;
 486           }
 487           Block *NewOwner = Known->second;
 488           if (!NewOwner)
 489             continue; // We reached an invalidated node
 490           if (NewOwner != Owner)
 491             // Invalidate this and all reachable that we have seen - we reached
 492             // this from two locations
 493             Helper.InvalidateWithChildren(New);
 494           // otherwise, we have the same owner, so do nothing
 495         }
 496       }
 497
 498       // Having processed all the interesting blocks, we remain with just one
 499       // potential issue:
 500       // If a->b, and a was invalidated, but then b was later reached by
 501       // someone else, we must invalidate b. To check for this, we go over all
 502       // elements in the independent groups, if an element has a parent which
 503       // does *not* have the same owner, we/ must remove it and all its
 504       // children.
 505
 506       for (const auto &iter : Entries) {
 507         BlockSet &CurrGroup = IndependentGroups[iter];
 508         BlockList ToInvalidate;
 509         for (const auto &iter : CurrGroup) {
 510           Block *Child = iter;
 511           for (const auto &iter : Child->BranchesIn) {
 512             Block *Parent = iter;
 513             if (Ignore && contains(*Ignore, Parent))
 514               continue;
 515             if (Helper.Ownership[Parent] != Helper.Ownership[Child])
 516               ToInvalidate.push_back(Child);
 517           }
 518         }
 519         while (!ToInvalidate.empty()) {
 520           Block *Invalidatee = ToInvalidate.front();
 521           ToInvalidate.pop_front();
 522           Helper.InvalidateWithChildren(Invalidatee);
 523         }
 524       }
 525
 526       // Remove empty groups
 527       for (const auto &iter : Entries)
 528         if (IndependentGroups[iter].empty())
 529           IndependentGroups.erase(iter);
 530     }
 531
 532     Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
 533                         BlockBlockSetMap &IndependentGroups, Shape *Prev,
 534                         BlockSet &NextEntries) {
 535       bool Fused = isa<SimpleShape>(Prev);
 536       MultipleShape *Multiple = new MultipleShape();
 537       Notice(Multiple);
 538       BlockSet CurrEntries;
 539       for (auto &iter : IndependentGroups) {
 540         Block *CurrEntry = iter.first;
 541         BlockSet &CurrBlocks = iter.second;
 542         // Create inner block
 543         CurrEntries.clear();
 544         CurrEntries.insert(CurrEntry);
 545         for (const auto &CurrInner : CurrBlocks) {
 546           // Remove the block from the remaining blocks
 547           Blocks.remove(CurrInner);
 548           // Find new next entries and fix branches to them
 549           for (auto iter = CurrInner->BranchesOut.begin();
 550                iter != CurrInner->BranchesOut.end();) {
 551             Block *CurrTarget = iter->first;
 552             auto Next = iter;
 553             Next++;
 554             if (!contains(CurrBlocks, CurrTarget)) {
 555               NextEntries.insert(CurrTarget);
 556               Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
 557             }
 558             iter = Next; // increment carefully because Solipsize can remove us
 559           }
 560         }
 561         Multiple->InnerMap[CurrEntry->Id] =
 562             Process(CurrBlocks, CurrEntries, nullptr);
 563         // If we are not fused, then our entries will actually be checked
 564         if (!Fused)
 565           CurrEntry->IsCheckedMultipleEntry = true;
 566       }
 567       // Add entries not handled as next entries, they are deferred
 568       for (const auto &Entry : Entries)
 569         if (!contains(IndependentGroups, Entry))
 570           NextEntries.insert(Entry);
 571       // The multiple has been created, we can decide how to implement it
 572       if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
 573         Multiple->UseSwitch = true;
 574         Multiple->Breaks++; // switch captures breaks
 575       }
 576       return Multiple;
 577     }
 578
 579     // Main function.
 580     // Process a set of blocks with specified entries, returns a shape
 581     // The Make* functions receive a NextEntries. If they fill it with data,
 582     // those are the entries for the ->Next block on them, and the blocks
 583     // are what remains in Blocks (which Make* modify). In this way
 584     // we avoid recursing on Next (imagine a long chain of Simples, if we
 585     // recursed we could blow the stack).
 586     Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
 587       BlockSet *Entries = &InitialEntries;
 588       BlockSet TempEntries[2];
 589       int CurrTempIndex = 0;
 590       BlockSet *NextEntries;
 591       Shape *Ret = nullptr;
 592
 593       auto Make = [&](Shape *Temp) {
 594         if (Prev)
 595           Prev->Next = Temp;
 596         if (!Ret)
 597           Ret = Temp;
 598         Prev = Temp;
 599         Entries = NextEntries;
 600       };
 601
 602       while (1) {
 603         CurrTempIndex = 1 - CurrTempIndex;
 604         NextEntries = &TempEntries[CurrTempIndex];
 605         NextEntries->clear();
 606
 607         if (Entries->empty())
 608           return Ret;
 609         if (Entries->size() == 1) {
 610           Block *Curr = *(Entries->begin());
 611           if (Curr->BranchesIn.empty()) {
 612             // One entry, no looping ==> Simple
 613             Make(MakeSimple(Blocks, Curr, *NextEntries));
 614             if (NextEntries->empty())
 615               return Ret;
 616             continue;
 617           }
 618           // One entry, looping ==> Loop
 619           Make(MakeLoop(Blocks, *Entries, *NextEntries));
 620           if (NextEntries->empty())
 621             return Ret;
 622           continue;
 623         }
 624
 625         // More than one entry, try to eliminate through a Multiple groups of
 626         // independent blocks from an entry/ies. It is important to remove
 627         // through multiples as opposed to looping since the former is more
 628         // performant.
 629         BlockBlockSetMap IndependentGroups;
 630         FindIndependentGroups(*Entries, IndependentGroups);
 631
 632         if (!IndependentGroups.empty()) {
 633           // We can handle a group in a multiple if its entry cannot be reached
 634           // by another group.
 635           // Note that it might be reachable by itself - a loop. But that is
 636           // fine, we will create a loop inside the multiple block (which
 637           // is the performant order to do it).
 638           for (auto iter = IndependentGroups.begin();
 639                iter != IndependentGroups.end();) {
 640             Block *Entry = iter->first;
 641             BlockSet &Group = iter->second;
 642             auto curr = iter++; // iterate carefully, we may delete
 643             for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
 644                  iterBranch != Entry->BranchesIn.end(); iterBranch++) {
 645               Block *Origin = *iterBranch;
 646               if (!contains(Group, Origin)) {
 647                 // Reached from outside the group, so we cannot handle this
 648                 IndependentGroups.erase(curr);
 649                 break;
 650               }
 651             }
 652           }
 653
 654           // As an optimization, if we have 2 independent groups, and one is a
 655           // small dead end, we can handle only that dead end.
 656           // The other then becomes a Next - without nesting in the code and
 657           // recursion in the analysis.
 658           // TODO: if the larger is the only dead end, handle that too
 659           // TODO: handle >2 groups
 660           // TODO: handle not just dead ends, but also that do not branch to the
 661           // NextEntries. However, must be careful there since we create a
 662           // Next, and that Next can prevent eliminating a break (since we no
 663           // longer naturally reach the same place), which may necessitate a
 664           // one-time loop, which makes the unnesting pointless.
 665           if (IndependentGroups.size() == 2) {
 666             // Find the smaller one
 667             auto iter = IndependentGroups.begin();
 668             Block *SmallEntry = iter->first;
 669             auto SmallSize = iter->second.size();
 670             iter++;
 671             Block *LargeEntry = iter->first;
 672             auto LargeSize = iter->second.size();
 673             if (SmallSize != LargeSize) { // ignore the case where they are
 674                                           // identical - keep things symmetrical
 675                                           // there
 676               if (SmallSize > LargeSize) {
 677                 Block *Temp = SmallEntry;
 678                 SmallEntry = LargeEntry;
 679                 LargeEntry = Temp; // Note: we did not flip the Sizes too, they
 680                                    // are now invalid. TODO: use the smaller
 681                                    // size as a limit?
 682               }
 683               // Check if dead end
 684               bool DeadEnd = true;
 685               BlockSet &SmallGroup = IndependentGroups[SmallEntry];
 686               for (const auto &Curr : SmallGroup) {
 687                 for (const auto &iter : Curr->BranchesOut) {
 688                   Block *Target = iter.first;
 689                   if (!contains(SmallGroup, Target)) {
 690                     DeadEnd = false;
 691                     break;
 692                   }
 693                 }
 694                 if (!DeadEnd)
 695                   break;
 696               }
 697               if (DeadEnd)
 698                 IndependentGroups.erase(LargeEntry);
 699             }
 700           }
 701
 702           if (!IndependentGroups.empty())
 703             // Some groups removable ==> Multiple
 704             Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
 705                               *NextEntries));
 706             if (NextEntries->empty())
 707               return Ret;
 708             continue;
 709         }
 710         // No independent groups, must be loopable ==> Loop
 711         Make(MakeLoop(Blocks, *Entries, *NextEntries));
 712         if (NextEntries->empty())
 713           return Ret;
 714         continue;
 715       }
 716     }
 717   };
 718
 719   // Main
 720
 721   BlockSet AllBlocks;
 722   for (const auto &Curr : Pre.Live) {
 723     AllBlocks.insert(Curr);
 724   }
 725
 726   BlockSet Entries;
 727   Entries.insert(Entry);
 728   Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
 729   assert(Root);
 730
 731   ///
 732   /// Relooper post-optimizer
 733   ///
 734   struct PostOptimizer {
 735     RelooperAlgorithm *Parent;
 736     std::stack<Shape *> LoopStack;
 737
 738     PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
 739
 740     void ShapeSwitch(Shape* var,
 741                      std::function<void (SimpleShape*)> simple,
 742                      std::function<void (MultipleShape*)> multiple,
 743                      std::function<void (LoopShape*)> loop) {
 744       switch (var->getKind()) {
 745         case Shape::SK_Simple: {
 746           simple(cast<SimpleShape>(var));
 747           break;
 748         }
 749         case Shape::SK_Multiple: {
 750           multiple(cast<MultipleShape>(var));
 751           break;
 752         }
 753         case Shape::SK_Loop: {
 754           loop(cast<LoopShape>(var));
 755           break;
 756         }
 757       }
 758     }
 759
 760     // Find the blocks that natural control flow can get us directly to, or
 761     // through a multiple that we ignore
 762     void FollowNaturalFlow(Shape *S, BlockSet &Out) {
 763       ShapeSwitch(S, [&](SimpleShape* Simple) {
 764         Out.insert(Simple->Inner);
 765       }, [&](MultipleShape* Multiple) {
 766         for (const auto &iter : Multiple->InnerMap) {
 767           FollowNaturalFlow(iter.second, Out);
 768         }
 769         FollowNaturalFlow(Multiple->Next, Out);
 770       }, [&](LoopShape* Loop) {
 771         FollowNaturalFlow(Loop->Inner, Out);
 772       });
 773     }
 774
 775     void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
 776       if (Root->Next) {
 777         Root->Natural = Root->Next;
 778         FindNaturals(Root->Next, Otherwise);
 779       } else {
 780         Root->Natural = Otherwise;
 781       }
 782
 783       ShapeSwitch(Root, [](SimpleShape* Simple) {
 784       }, [&](MultipleShape* Multiple) {
 785         for (const auto &iter : Multiple->InnerMap) {
 786           FindNaturals(iter.second, Root->Natural);
 787         }
 788       }, [&](LoopShape* Loop){
 789         FindNaturals(Loop->Inner, Loop->Inner);
 790       });
 791     }
 792
 793     // Remove unneeded breaks and continues.
 794     // A flow operation is trivially unneeded if the shape we naturally get to
 795     // by normal code execution is the same as the flow forces us to.
 796     void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
 797                              LoopShape *LastLoop = nullptr,
 798                              unsigned Depth = 0) {
 799       BlockSet NaturalBlocks;
 800       FollowNaturalFlow(Natural, NaturalBlocks);
 801       Shape *Next = Root;
 802       while (Next) {
 803         Root = Next;
 804         Next = nullptr;
 805         ShapeSwitch(
 806             Root,
 807             [&](SimpleShape* Simple) {
 808               if (Simple->Inner->BranchVar)
 809                 LastLoop =
 810                     nullptr; // a switch clears out the loop (TODO: only for
 811                              // breaks, not continue)
 812
 813               if (Simple->Next) {
 814                 if (!Simple->Inner->BranchVar &&
 815                     Simple->Inner->ProcessedBranchesOut.size() == 2 &&
 816                     Depth < RelooperNestingLimit) {
 817                   // If there is a next block, we already know at Simple
 818                   // creation time to make direct branches, and we can do
 819                   // nothing more in general. But, we try to optimize the
 820                   // case of a break and a direct: This would normally be
 821                   //   if (break?) { break; } ..
 822                   // but if we make sure to nest the else, we can save the
 823                   // break,
 824                   //   if (!break?) { .. }
 825                   // This is also better because the more canonical nested
 826                   // form is easier to further optimize later. The
 827                   // downside is more nesting, which adds to size in builds with
 828                   // whitespace.
 829                   // Note that we avoid switches, as it complicates control flow
 830                   // and is not relevant for the common case we optimize here.
 831                   bool Found = false;
 832                   bool Abort = false;
 833                   for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
 834                     Block *Target = iter.first;
 835                     Branch *Details = iter.second.get();
 836                     if (Details->Type == Branch::Break) {
 837                       Found = true;
 838                       if (!contains(NaturalBlocks, Target))
 839                         Abort = true;
 840                     } else if (Details->Type != Branch::Direct)
 841                       Abort = true;
 842                   }
 843                   if (Found && !Abort) {
 844                     for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
 845                       Branch *Details = iter.second.get();
 846                       if (Details->Type == Branch::Break) {
 847                         Details->Type = Branch::Direct;
 848                         if (MultipleShape *Multiple =
 849                                 dyn_cast<MultipleShape>(Details->Ancestor))
 850                           Multiple->Breaks--;
 851                       } else {
 852                         assert(Details->Type == Branch::Direct);
 853                         Details->Type = Branch::Nested;
 854                       }
 855                     }
 856                   }
 857                   Depth++; // this optimization increases depth, for us and all
 858                            // our next chain (i.e., until this call returns)
 859                 }
 860                 Next = Simple->Next;
 861               } else {
 862                 // If there is no next then Natural is where we will
 863                 // go to by doing nothing, so we can potentially optimize some
 864                 // branches to direct.
 865                 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
 866                   Block *Target = iter.first;
 867                   Branch *Details = iter.second.get();
 868                   if (Details->Type != Branch::Direct &&
 869                       contains(NaturalBlocks,
 870                                Target)) { // note: cannot handle split blocks
 871                     Details->Type = Branch::Direct;
 872                     if (MultipleShape *Multiple =
 873                             dyn_cast<MultipleShape>(Details->Ancestor))
 874                       Multiple->Breaks--;
 875                   } else if (Details->Type == Branch::Break && LastLoop &&
 876                              LastLoop->Natural == Details->Ancestor->Natural) {
 877                     // it is important to simplify breaks, as simpler breaks
 878                     // enable other optimizations
 879                     Details->Labeled = false;
 880                     if (MultipleShape *Multiple =
 881                             dyn_cast<MultipleShape>(Details->Ancestor))
 882                       Multiple->Breaks--;
 883                   }
 884                 }
 885               }
 886             }, [&](MultipleShape* Multiple)
 887             {
 888               for (const auto &iter : Multiple->InnerMap) {
 889                 RemoveUnneededFlows(iter.second, Multiple->Next,
 890                                     Multiple->Breaks ? nullptr : LastLoop,
 891                                     Depth + 1);
 892               }
 893               Next = Multiple->Next;
 894             }, [&](LoopShape* Loop)
 895             {
 896               RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
 897               Next = Loop->Next;
 898             });
 899       }
 900     }
 901
 902     // After we know which loops exist, we can calculate which need to be
 903     // labeled
 904     void FindLabeledLoops(Shape *Root) {
 905       Shape *Next = Root;
 906       while (Next) {
 907         Root = Next;
 908         Next = nullptr;
 909
 910         ShapeSwitch(
 911             Root,
 912             [&](SimpleShape *Simple) {
 913           MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
 914           // If we are fusing a Multiple with a loop into this Simple, then
 915           // visit it now
 916           if (Fused && Fused->Breaks)
 917             LoopStack.push(Fused);
 918           if (Simple->Inner->BranchVar)
 919             LoopStack.push(nullptr); // a switch means breaks are now useless,
 920                                      // push a dummy
 921           if (Fused) {
 922             if (Fused->UseSwitch)
 923               LoopStack.push(nullptr); // a switch means breaks are now
 924                                        // useless, push a dummy
 925             for (const auto &iter : Fused->InnerMap) {
 926               FindLabeledLoops(iter.second);
 927             }
 928           }
 929           for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
 930             Branch *Details = iter.second.get();
 931             if (Details->Type == Branch::Break ||
 932                 Details->Type == Branch::Continue) {
 933               assert(!LoopStack.empty());
 934               if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
 935                 if (MultipleShape *Multiple =
 936                         dyn_cast<MultipleShape>(Details->Ancestor)) {
 937                   Multiple->Labeled = true;
 938                 } else {
 939                   LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
 940                   Loop->Labeled = true;
 941                 }
 942               } else {
 943                 Details->Labeled = false;
 944               }
 945             }
 946             if (Fused && Fused->UseSwitch)
 947               LoopStack.pop();
 948             if (Simple->Inner->BranchVar)
 949               LoopStack.pop();
 950             if (Fused && Fused->Breaks)
 951               LoopStack.pop();
 952             if (Fused)
 953               Next = Fused->Next;
 954             else
 955               Next = Root->Next;
 956           }
 957           }
 958           , [&](MultipleShape* Multiple) {
 959             if (Multiple->Breaks)
 960               LoopStack.push(Multiple);
 961             for (const auto &iter : Multiple->InnerMap)
 962               FindLabeledLoops(iter.second);
 963             if (Multiple->Breaks)
 964               LoopStack.pop();
 965             Next = Root->Next;
 966           }
 967           , [&](LoopShape* Loop) {
 968             LoopStack.push(Loop);
 969             FindLabeledLoops(Loop->Inner);
 970             LoopStack.pop();
 971             Next = Root->Next;
 972           });
 973       }
 974     }
 975
 976     void Process(Shape * Root) {
 977       FindNaturals(Root);
 978       RemoveUnneededFlows(Root);
 979       FindLabeledLoops(Root);
 980     }
 981   };
 982
 983   PostOptimizer(this).Process(Root);
 984 }