lib/Target/WebAssembly/Relooper.cpp

   1 //===-- Relooper.cpp - Top-level interface for WebAssembly  ----*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===---------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This implements the Relooper algorithm. This implementation includes
  12 /// optimizations added since the original academic paper [1] was published.
  13 ///
  14 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
  15 /// Proceedings of the ACM international conference companion on Object
  16 /// oriented programming systems languages and applications companion
  17 /// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
  18 /// http://doi.acm.org/10.1145/2048147.2048224
  19 ///
  20 //===-------------------------------------------------------------------===//
  21
  22 #include "Relooper.h"
  23
  24 #include <llvm/ADT/STLExtras.h>
  25 #include <llvm/Support/raw_ostream.h>
  26 #include <llvm/Support/CommandLine.h>
  27
  28 #include <cstring>
  29 #include <cstdlib>
  30 #include <functional>
  31 #include <list>
  32 #include <stack>
  33 #include <string>
  34
  35 using namespace llvm;
  36
  37 static cl::opt<int> RelooperSplittingFactor(
  38     "relooper-splitting-factor",
  39     cl::desc(
  40         "How much to discount code size when deciding whether to split a node"),
  41     cl::init(5));
  42
  43 static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
  44     "relooper-multiple-switch-threshold",
  45     cl::desc(
  46         "How many entries to allow in a multiple before we use a switch"),
  47     cl::init(10));
  48
  49 static cl::opt<unsigned> RelooperNestingLimit(
  50     "relooper-nesting-limit",
  51     cl::desc(
  52         "How much nesting is acceptable"),
  53     cl::init(20));
  54
  55 namespace llvm {
  56
  57 namespace Relooper {
  58
  59 template <class T, class U>
  60 static bool contains(const T &container, const U &contained) {
  61   return container.count(contained);
  62 }
  63
  64 // Branch
  65
  66 Branch::Branch(const char *ConditionInit, const char *CodeInit)
  67     : Ancestor(nullptr), Labeled(true) {
  68   // FIXME: move from char* to LLVM data structures
  69   Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
  70   Code = CodeInit ? strdup(CodeInit) : nullptr;
  71 }
  72
  73 Branch::~Branch() {
  74   // FIXME: move from char* to LLVM data structures
  75   free(static_cast<void *>(const_cast<char *>(Condition)));
  76   free(static_cast<void *>(const_cast<char *>(Code)));
  77 }
  78
  79 // Block
  80
  81 Block::Block(const char *CodeInit, const char *BranchVarInit)
  82     : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
  83   // FIXME: move from char* to LLVM data structures
  84   Code = strdup(CodeInit);
  85   BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
  86 }
  87
  88 Block::~Block() {
  89   // FIXME: move from char* to LLVM data structures
  90   free(static_cast<void *>(const_cast<char *>(Code)));
  91   free(static_cast<void *>(const_cast<char *>(BranchVar)));
  92 }
  93
  94 void Block::AddBranchTo(Block *Target, const char *Condition,
  95                         const char *Code) {
  96   assert(
  97       !contains(BranchesOut,
  98                 Target)); // cannot add more than one branch to the same target
  99   BranchesOut[Target] = make_unique<Branch>(Condition, Code);
 100 }
 101
 102 // Relooper
 103
 104 Relooper::Relooper()
 105     : Root(nullptr), MinSize(false), BlockIdCounter(1),
 106       ShapeIdCounter(0) { // block ID 0 is reserved for clearings
 107 }
 108
 109 Relooper::~Relooper() {
 110   for (auto Curr : Blocks)
 111     delete Curr;
 112   for (auto Curr : Shapes)
 113     delete Curr;
 114 }
 115
 116 void Relooper::AddBlock(Block *New, int Id) {
 117   New->Id = Id == -1 ? BlockIdCounter++ : Id;
 118   Blocks.push_back(New);
 119 }
 120
 121 struct RelooperRecursor {
 122   Relooper *Parent;
 123   RelooperRecursor(Relooper *ParentInit) : Parent(ParentInit) {}
 124 };
 125
 126 typedef std::list<Block *> BlockList;
 127
 128 void Relooper::Calculate(Block *Entry) {
 129   // Scan and optimize the input
 130   struct PreOptimizer : public RelooperRecursor {
 131     PreOptimizer(Relooper *Parent) : RelooperRecursor(Parent) {}
 132     BlockSet Live;
 133
 134     void FindLive(Block *Root) {
 135       BlockList ToInvestigate;
 136       ToInvestigate.push_back(Root);
 137       while (!ToInvestigate.empty()) {
 138         Block *Curr = ToInvestigate.front();
 139         ToInvestigate.pop_front();
 140         if (contains(Live, Curr))
 141           continue;
 142         Live.insert(Curr);
 143         for (const auto &iter : Curr->BranchesOut)
 144           ToInvestigate.push_back(iter.first);
 145       }
 146     }
 147
 148     // If a block has multiple entries but no exits, and it is small enough, it
 149     // is useful to split it. A common example is a C++ function where
 150     // everything ends up at a final exit block and does some RAII cleanup.
 151     // Without splitting, we will be forced to introduce labelled loops to
 152     // allow reaching the final block
 153     void SplitDeadEnds() {
 154       unsigned TotalCodeSize = 0;
 155       for (const auto &Curr : Live) {
 156         TotalCodeSize += strlen(Curr->Code);
 157       }
 158       BlockSet Splits;
 159       BlockSet Removed;
 160       for (const auto &Original : Live) {
 161         if (Original->BranchesIn.size() <= 1 ||
 162             !Original->BranchesOut.empty())
 163           continue; // only dead ends, for now
 164         if (contains(Original->BranchesOut, Original))
 165           continue; // cannot split a looping node
 166         if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
 167             TotalCodeSize / RelooperSplittingFactor)
 168           continue; // if splitting increases raw code size by a significant
 169                     // amount, abort
 170         // Split the node (for simplicity, we replace all the blocks, even
 171         // though we could have reused the original)
 172         for (const auto &Prior : Original->BranchesIn) {
 173           Block *Split = new Block(Original->Code, Original->BranchVar);
 174           Parent->AddBlock(Split, Original->Id);
 175           Split->BranchesIn.insert(Prior);
 176           std::unique_ptr<Branch> Details;
 177           Details.swap(Prior->BranchesOut[Original]);
 178           Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
 179                                                           Details->Code);
 180           for (const auto &iter : Original->BranchesOut) {
 181             Block *Post = iter.first;
 182             Branch *Details = iter.second.get();
 183             Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
 184                                                            Details->Code);
 185             Post->BranchesIn.insert(Split);
 186           }
 187           Splits.insert(Split);
 188           Removed.insert(Original);
 189         }
 190         for (const auto &iter : Original->BranchesOut) {
 191           Block *Post = iter.first;
 192           Post->BranchesIn.remove(Original);
 193         }
 194       }
 195       for (const auto &iter : Splits)
 196         Live.insert(iter);
 197       for (const auto &iter : Removed)
 198         Live.remove(iter);
 199     }
 200   };
 201   PreOptimizer Pre(this);
 202   Pre.FindLive(Entry);
 203
 204   // Add incoming branches from live blocks, ignoring dead code
 205   for (unsigned i = 0; i < Blocks.size(); i++) {
 206     Block *Curr = Blocks[i];
 207     if (!contains(Pre.Live, Curr))
 208       continue;
 209     for (const auto &iter : Curr->BranchesOut)
 210       iter.first->BranchesIn.insert(Curr);
 211   }
 212
 213   if (!MinSize)
 214     Pre.SplitDeadEnds();
 215
 216   // Recursively process the graph
 217
 218   struct Analyzer : public RelooperRecursor {
 219     Analyzer(Relooper *Parent) : RelooperRecursor(Parent) {}
 220
 221     // Add a shape to the list of shapes in this Relooper calculation
 222     void Notice(Shape *New) {
 223       New->Id = Parent->ShapeIdCounter++;
 224       Parent->Shapes.push_back(New);
 225     }
 226
 227     // Create a list of entries from a block. If LimitTo is provided, only
 228     // results in that set will appear
 229     void GetBlocksOut(Block *Source, BlockSet &Entries,
 230                       BlockSet *LimitTo = nullptr) {
 231       for (const auto &iter : Source->BranchesOut)
 232         if (!LimitTo || contains(*LimitTo, iter.first))
 233           Entries.insert(iter.first);
 234     }
 235
 236     // Converts/processes all branchings to a specific target
 237     void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
 238                    BlockSet &From) {
 239       for (auto iter = Target->BranchesIn.begin();
 240            iter != Target->BranchesIn.end();) {
 241         Block *Prior = *iter;
 242         if (!contains(From, Prior)) {
 243           iter++;
 244           continue;
 245         }
 246         std::unique_ptr<Branch> PriorOut;
 247         PriorOut.swap(Prior->BranchesOut[Target]);
 248         PriorOut->Ancestor = Ancestor;
 249         PriorOut->Type = Type;
 250         if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
 251           Multiple->Breaks++; // We are breaking out of this Multiple, so need a
 252                               // loop
 253         iter++; // carefully increment iter before erasing
 254         Target->BranchesIn.remove(Prior);
 255         Target->ProcessedBranchesIn.insert(Prior);
 256         Prior->ProcessedBranchesOut[Target].swap(PriorOut);
 257       }
 258     }
 259
 260     Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
 261       SimpleShape *Simple = new SimpleShape;
 262       Notice(Simple);
 263       Simple->Inner = Inner;
 264       Inner->Parent = Simple;
 265       if (Blocks.size() > 1) {
 266         Blocks.remove(Inner);
 267         GetBlocksOut(Inner, NextEntries, &Blocks);
 268         BlockSet JustInner;
 269         JustInner.insert(Inner);
 270         for (const auto &iter : NextEntries)
 271           Solipsize(iter, Branch::Direct, Simple, JustInner);
 272       }
 273       return Simple;
 274     }
 275
 276     Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
 277                     BlockSet &NextEntries) {
 278       // Find the inner blocks in this loop. Proceed backwards from the entries
 279       // until
 280       // you reach a seen block, collecting as you go.
 281       BlockSet InnerBlocks;
 282       BlockSet Queue = Entries;
 283       while (!Queue.empty()) {
 284         Block *Curr = *(Queue.begin());
 285         Queue.remove(*Queue.begin());
 286         if (!contains(InnerBlocks, Curr)) {
 287           // This element is new, mark it as inner and remove from outer
 288           InnerBlocks.insert(Curr);
 289           Blocks.remove(Curr);
 290           // Add the elements prior to it
 291           for (const auto &iter : Curr->BranchesIn)
 292             Queue.insert(iter);
 293         }
 294       }
 295       assert(!InnerBlocks.empty());
 296
 297       for (const auto &Curr : InnerBlocks) {
 298         for (const auto &iter : Curr->BranchesOut) {
 299           Block *Possible = iter.first;
 300           if (!contains(InnerBlocks, Possible))
 301             NextEntries.insert(Possible);
 302         }
 303       }
 304
 305       LoopShape *Loop = new LoopShape();
 306       Notice(Loop);
 307
 308       // Solipsize the loop, replacing with break/continue and marking branches
 309       // as Processed (will not affect later calculations)
 310       // A. Branches to the loop entries become a continue to this shape
 311       for (const auto &iter : Entries)
 312         Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
 313       // B. Branches to outside the loop (a next entry) become breaks on this
 314       // shape
 315       for (const auto &iter : NextEntries)
 316         Solipsize(iter, Branch::Break, Loop, InnerBlocks);
 317       // Finish up
 318       Shape *Inner = Process(InnerBlocks, Entries, nullptr);
 319       Loop->Inner = Inner;
 320       return Loop;
 321     }
 322
 323     // For each entry, find the independent group reachable by it. The
 324     // independent group is the entry itself, plus all the blocks it can
 325     // reach that cannot be directly reached by another entry. Note that we
 326     // ignore directly reaching the entry itself by another entry.
 327     //   @param Ignore - previous blocks that are irrelevant
 328     void FindIndependentGroups(BlockSet &Entries,
 329                                BlockBlockSetMap &IndependentGroups,
 330                                BlockSet *Ignore = nullptr) {
 331       typedef std::map<Block *, Block *> BlockBlockMap;
 332
 333       struct HelperClass {
 334         BlockBlockSetMap &IndependentGroups;
 335         BlockBlockMap Ownership; // For each block, which entry it belongs to.
 336                                  // We have reached it from there.
 337
 338         HelperClass(BlockBlockSetMap &IndependentGroupsInit)
 339             : IndependentGroups(IndependentGroupsInit) {}
 340         void InvalidateWithChildren(Block *New) {
 341           // Being in the list means you need to be invalidated
 342           BlockList ToInvalidate;
 343           ToInvalidate.push_back(New);
 344           while (!ToInvalidate.empty()) {
 345             Block *Invalidatee = ToInvalidate.front();
 346             ToInvalidate.pop_front();
 347             Block *Owner = Ownership[Invalidatee];
 348             // Owner may have been invalidated, do not add to
 349             // IndependentGroups!
 350             if (contains(IndependentGroups, Owner))
 351               IndependentGroups[Owner].remove(Invalidatee);
 352             if (Ownership[Invalidatee]) { // may have been seen before and
 353                                           // invalidated already
 354               Ownership[Invalidatee] = nullptr;
 355               for (const auto &iter : Invalidatee->BranchesOut) {
 356                 Block *Target = iter.first;
 357                 BlockBlockMap::iterator Known = Ownership.find(Target);
 358                 if (Known != Ownership.end()) {
 359                   Block *TargetOwner = Known->second;
 360                   if (TargetOwner)
 361                     ToInvalidate.push_back(Target);
 362                 }
 363               }
 364             }
 365           }
 366         }
 367       };
 368       HelperClass Helper(IndependentGroups);
 369
 370       // We flow out from each of the entries, simultaneously.
 371       // When we reach a new block, we add it as belonging to the one we got to
 372       // it from.
 373       // If we reach a new block that is already marked as belonging to someone,
 374       // it is reachable by two entries and is not valid for any of them.
 375       // Remove it and all it can reach that have been visited.
 376
 377       // Being in the queue means we just added this item, and
 378       // we need to add its children
 379       BlockList Queue;
 380       for (const auto &Entry : Entries) {
 381         Helper.Ownership[Entry] = Entry;
 382         IndependentGroups[Entry].insert(Entry);
 383         Queue.push_back(Entry);
 384       }
 385       while (!Queue.empty()) {
 386         Block *Curr = Queue.front();
 387         Queue.pop_front();
 388         Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
 389                                                // map if we are in the queue
 390         if (!Owner)
 391           continue; // we have been invalidated meanwhile after being reached
 392                     // from two entries
 393         // Add all children
 394         for (const auto &iter : Curr->BranchesOut) {
 395           Block *New = iter.first;
 396           BlockBlockMap::iterator Known = Helper.Ownership.find(New);
 397           if (Known == Helper.Ownership.end()) {
 398             // New node. Add it, and put it in the queue
 399             Helper.Ownership[New] = Owner;
 400             IndependentGroups[Owner].insert(New);
 401             Queue.push_back(New);
 402             continue;
 403           }
 404           Block *NewOwner = Known->second;
 405           if (!NewOwner)
 406             continue; // We reached an invalidated node
 407           if (NewOwner != Owner)
 408             // Invalidate this and all reachable that we have seen - we reached
 409             // this from two locations
 410             Helper.InvalidateWithChildren(New);
 411           // otherwise, we have the same owner, so do nothing
 412         }
 413       }
 414
 415       // Having processed all the interesting blocks, we remain with just one
 416       // potential issue:
 417       // If a->b, and a was invalidated, but then b was later reached by
 418       // someone else, we must invalidate b. To check for this, we go over all
 419       // elements in the independent groups, if an element has a parent which
 420       // does *not* have the same owner, we/ must remove it and all its
 421       // children.
 422
 423       for (const auto &iter : Entries) {
 424         BlockSet &CurrGroup = IndependentGroups[iter];
 425         BlockList ToInvalidate;
 426         for (const auto &iter : CurrGroup) {
 427           Block *Child = iter;
 428           for (const auto &iter : Child->BranchesIn) {
 429             Block *Parent = iter;
 430             if (Ignore && contains(*Ignore, Parent))
 431               continue;
 432             if (Helper.Ownership[Parent] != Helper.Ownership[Child])
 433               ToInvalidate.push_back(Child);
 434           }
 435         }
 436         while (!ToInvalidate.empty()) {
 437           Block *Invalidatee = ToInvalidate.front();
 438           ToInvalidate.pop_front();
 439           Helper.InvalidateWithChildren(Invalidatee);
 440         }
 441       }
 442
 443       // Remove empty groups
 444       for (const auto &iter : Entries)
 445         if (IndependentGroups[iter].empty())
 446           IndependentGroups.erase(iter);
 447     }
 448
 449     Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
 450                         BlockBlockSetMap &IndependentGroups, Shape *Prev,
 451                         BlockSet &NextEntries) {
 452       bool Fused = isa<SimpleShape>(Prev);
 453       MultipleShape *Multiple = new MultipleShape();
 454       Notice(Multiple);
 455       BlockSet CurrEntries;
 456       for (auto &iter : IndependentGroups) {
 457         Block *CurrEntry = iter.first;
 458         BlockSet &CurrBlocks = iter.second;
 459         // Create inner block
 460         CurrEntries.clear();
 461         CurrEntries.insert(CurrEntry);
 462         for (const auto &CurrInner : CurrBlocks) {
 463           // Remove the block from the remaining blocks
 464           Blocks.remove(CurrInner);
 465           // Find new next entries and fix branches to them
 466           for (auto iter = CurrInner->BranchesOut.begin();
 467                iter != CurrInner->BranchesOut.end();) {
 468             Block *CurrTarget = iter->first;
 469             auto Next = iter;
 470             Next++;
 471             if (!contains(CurrBlocks, CurrTarget)) {
 472               NextEntries.insert(CurrTarget);
 473               Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
 474             }
 475             iter = Next; // increment carefully because Solipsize can remove us
 476           }
 477         }
 478         Multiple->InnerMap[CurrEntry->Id] =
 479             Process(CurrBlocks, CurrEntries, nullptr);
 480         // If we are not fused, then our entries will actually be checked
 481         if (!Fused)
 482           CurrEntry->IsCheckedMultipleEntry = true;
 483       }
 484       // Add entries not handled as next entries, they are deferred
 485       for (const auto &Entry : Entries)
 486         if (!contains(IndependentGroups, Entry))
 487           NextEntries.insert(Entry);
 488       // The multiple has been created, we can decide how to implement it
 489       if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
 490         Multiple->UseSwitch = true;
 491         Multiple->Breaks++; // switch captures breaks
 492       }
 493       return Multiple;
 494     }
 495
 496     // Main function.
 497     // Process a set of blocks with specified entries, returns a shape
 498     // The Make* functions receive a NextEntries. If they fill it with data,
 499     // those are the entries for the ->Next block on them, and the blocks
 500     // are what remains in Blocks (which Make* modify). In this way
 501     // we avoid recursing on Next (imagine a long chain of Simples, if we
 502     // recursed we could blow the stack).
 503     Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
 504       BlockSet *Entries = &InitialEntries;
 505       BlockSet TempEntries[2];
 506       int CurrTempIndex = 0;
 507       BlockSet *NextEntries;
 508       Shape *Ret = nullptr;
 509
 510       auto Make = [&](Shape *Temp) {
 511         if (Prev)
 512           Prev->Next = Temp;
 513         if (!Ret)
 514           Ret = Temp;
 515         Prev = Temp;
 516         Entries = NextEntries;
 517       };
 518
 519       while (1) {
 520         CurrTempIndex = 1 - CurrTempIndex;
 521         NextEntries = &TempEntries[CurrTempIndex];
 522         NextEntries->clear();
 523
 524         if (Entries->empty())
 525           return Ret;
 526         if (Entries->size() == 1) {
 527           Block *Curr = *(Entries->begin());
 528           if (Curr->BranchesIn.empty()) {
 529             // One entry, no looping ==> Simple
 530             Make(MakeSimple(Blocks, Curr, *NextEntries));
 531             if (NextEntries->empty())
 532               return Ret;
 533             continue;
 534           }
 535           // One entry, looping ==> Loop
 536           Make(MakeLoop(Blocks, *Entries, *NextEntries));
 537           if (NextEntries->empty())
 538             return Ret;
 539           continue;
 540         }
 541
 542         // More than one entry, try to eliminate through a Multiple groups of
 543         // independent blocks from an entry/ies. It is important to remove
 544         // through multiples as opposed to looping since the former is more
 545         // performant.
 546         BlockBlockSetMap IndependentGroups;
 547         FindIndependentGroups(*Entries, IndependentGroups);
 548
 549         if (!IndependentGroups.empty()) {
 550           // We can handle a group in a multiple if its entry cannot be reached
 551           // by another group.
 552           // Note that it might be reachable by itself - a loop. But that is
 553           // fine, we will create a loop inside the multiple block (which
 554           // is the performant order to do it).
 555           for (auto iter = IndependentGroups.begin();
 556                iter != IndependentGroups.end();) {
 557             Block *Entry = iter->first;
 558             BlockSet &Group = iter->second;
 559             auto curr = iter++; // iterate carefully, we may delete
 560             for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
 561                  iterBranch != Entry->BranchesIn.end(); iterBranch++) {
 562               Block *Origin = *iterBranch;
 563               if (!contains(Group, Origin)) {
 564                 // Reached from outside the group, so we cannot handle this
 565                 IndependentGroups.erase(curr);
 566                 break;
 567               }
 568             }
 569           }
 570
 571           // As an optimization, if we have 2 independent groups, and one is a
 572           // small dead end, we can handle only that dead end.
 573           // The other then becomes a Next - without nesting in the code and
 574           // recursion in the analysis.
 575           // TODO: if the larger is the only dead end, handle that too
 576           // TODO: handle >2 groups
 577           // TODO: handle not just dead ends, but also that do not branch to the
 578           // NextEntries. However, must be careful there since we create a
 579           // Next, and that Next can prevent eliminating a break (since we no
 580           // longer naturally reach the same place), which may necessitate a
 581           // one-time loop, which makes the unnesting pointless.
 582           if (IndependentGroups.size() == 2) {
 583             // Find the smaller one
 584             auto iter = IndependentGroups.begin();
 585             Block *SmallEntry = iter->first;
 586             auto SmallSize = iter->second.size();
 587             iter++;
 588             Block *LargeEntry = iter->first;
 589             auto LargeSize = iter->second.size();
 590             if (SmallSize != LargeSize) { // ignore the case where they are
 591                                           // identical - keep things symmetrical
 592                                           // there
 593               if (SmallSize > LargeSize) {
 594                 Block *Temp = SmallEntry;
 595                 SmallEntry = LargeEntry;
 596                 LargeEntry = Temp; // Note: we did not flip the Sizes too, they
 597                                    // are now invalid. TODO: use the smaller
 598                                    // size as a limit?
 599               }
 600               // Check if dead end
 601               bool DeadEnd = true;
 602               BlockSet &SmallGroup = IndependentGroups[SmallEntry];
 603               for (const auto &Curr : SmallGroup) {
 604                 for (const auto &iter : Curr->BranchesOut) {
 605                   Block *Target = iter.first;
 606                   if (!contains(SmallGroup, Target)) {
 607                     DeadEnd = false;
 608                     break;
 609                   }
 610                 }
 611                 if (!DeadEnd)
 612                   break;
 613               }
 614               if (DeadEnd)
 615                 IndependentGroups.erase(LargeEntry);
 616             }
 617           }
 618
 619           if (!IndependentGroups.empty())
 620             // Some groups removable ==> Multiple
 621             Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
 622                               *NextEntries));
 623             if (NextEntries->empty())
 624               return Ret;
 625             continue;
 626         }
 627         // No independent groups, must be loopable ==> Loop
 628         Make(MakeLoop(Blocks, *Entries, *NextEntries));
 629         if (NextEntries->empty())
 630           return Ret;
 631         continue;
 632       }
 633     }
 634   };
 635
 636   // Main
 637
 638   BlockSet AllBlocks;
 639   for (const auto &Curr : Pre.Live) {
 640     AllBlocks.insert(Curr);
 641   }
 642
 643   BlockSet Entries;
 644   Entries.insert(Entry);
 645   Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
 646   assert(Root);
 647
 648   ///
 649   /// Relooper post-optimizer
 650   ///
 651   struct PostOptimizer {
 652     Relooper *Parent;
 653     std::stack<Shape *> LoopStack;
 654
 655     PostOptimizer(Relooper *ParentInit) : Parent(ParentInit) {}
 656
 657     void ShapeSwitch(Shape* var,
 658                      std::function<void (SimpleShape*)> simple,
 659                      std::function<void (MultipleShape*)> multiple,
 660                      std::function<void (LoopShape*)> loop) {
 661       switch (var->getKind()) {
 662         case Shape::SK_Simple: {
 663           simple(cast<SimpleShape>(var));
 664           break;
 665         }
 666         case Shape::SK_Multiple: {
 667           multiple(cast<MultipleShape>(var));
 668           break;
 669         }
 670         case Shape::SK_Loop: {
 671           loop(cast<LoopShape>(var));
 672           break;
 673         }
 674         default: llvm_unreachable("invalid shape");
 675       }
 676     }
 677
 678     // Find the blocks that natural control flow can get us directly to, or
 679     // through a multiple that we ignore
 680     void FollowNaturalFlow(Shape *S, BlockSet &Out) {
 681       ShapeSwitch(S, [&](SimpleShape* Simple) {
 682         Out.insert(Simple->Inner);
 683       }, [&](MultipleShape* Multiple) {
 684         for (const auto &iter : Multiple->InnerMap) {
 685           FollowNaturalFlow(iter.second, Out);
 686         }
 687         FollowNaturalFlow(Multiple->Next, Out);
 688       }, [&](LoopShape* Loop) {
 689         FollowNaturalFlow(Loop->Inner, Out);
 690       });
 691     }
 692
 693     void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
 694       if (Root->Next) {
 695         Root->Natural = Root->Next;
 696         FindNaturals(Root->Next, Otherwise);
 697       } else {
 698         Root->Natural = Otherwise;
 699       }
 700
 701       ShapeSwitch(Root, [](SimpleShape* Simple) {
 702       }, [&](MultipleShape* Multiple) {
 703         for (const auto &iter : Multiple->InnerMap) {
 704           FindNaturals(iter.second, Root->Natural);
 705         }
 706       }, [&](LoopShape* Loop){
 707         FindNaturals(Loop->Inner, Loop->Inner);
 708       });
 709     }
 710
 711     // Remove unneeded breaks and continues.
 712     // A flow operation is trivially unneeded if the shape we naturally get to
 713     // by normal code execution is the same as the flow forces us to.
 714     void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
 715                              LoopShape *LastLoop = nullptr,
 716                              unsigned Depth = 0) {
 717       BlockSet NaturalBlocks;
 718       FollowNaturalFlow(Natural, NaturalBlocks);
 719       Shape *Next = Root;
 720       while (Next) {
 721         Root = Next;
 722         Next = nullptr;
 723         ShapeSwitch(
 724             Root,
 725             [&](SimpleShape* Simple) {
 726               if (Simple->Inner->BranchVar)
 727                 LastLoop =
 728                     nullptr; // a switch clears out the loop (TODO: only for
 729                              // breaks, not continue)
 730
 731               if (Simple->Next) {
 732                 if (!Simple->Inner->BranchVar &&
 733                     Simple->Inner->ProcessedBranchesOut.size() == 2 &&
 734                     Depth < RelooperNestingLimit) {
 735                   // If there is a next block, we already know at Simple
 736                   // creation time to make direct branches, and we can do
 737                   // nothing more in general. But, we try to optimize the
 738                   // case of a break and a direct: This would normally be
 739                   //   if (break?) { break; } ..
 740                   // but if we make sure to nest the else, we can save the
 741                   // break,
 742                   //   if (!break?) { .. }
 743                   // This is also better because the more canonical nested
 744                   // form is easier to further optimize later. The
 745                   // downside is more nesting, which adds to size in builds with
 746                   // whitespace.
 747                   // Note that we avoid switches, as it complicates control flow
 748                   // and is not relevant for the common case we optimize here.
 749                   bool Found = false;
 750                   bool Abort = false;
 751                   for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
 752                     Block *Target = iter.first;
 753                     Branch *Details = iter.second.get();
 754                     if (Details->Type == Branch::Break) {
 755                       Found = true;
 756                       if (!contains(NaturalBlocks, Target))
 757                         Abort = true;
 758                     } else if (Details->Type != Branch::Direct)
 759                       Abort = true;
 760                   }
 761                   if (Found && !Abort) {
 762                     for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
 763                       Branch *Details = iter.second.get();
 764                       if (Details->Type == Branch::Break) {
 765                         Details->Type = Branch::Direct;
 766                         if (MultipleShape *Multiple =
 767                                 dyn_cast<MultipleShape>(Details->Ancestor))
 768                           Multiple->Breaks--;
 769                       } else {
 770                         assert(Details->Type == Branch::Direct);
 771                         Details->Type = Branch::Nested;
 772                       }
 773                     }
 774                   }
 775                   Depth++; // this optimization increases depth, for us and all
 776                            // our next chain (i.e., until this call returns)
 777                 }
 778                 Next = Simple->Next;
 779               } else {
 780                 // If there is no next then Natural is where we will
 781                 // go to by doing nothing, so we can potentially optimize some
 782                 // branches to direct.
 783                 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
 784                   Block *Target = iter.first;
 785                   Branch *Details = iter.second.get();
 786                   if (Details->Type != Branch::Direct &&
 787                       contains(NaturalBlocks,
 788                                Target)) { // note: cannot handle split blocks
 789                     Details->Type = Branch::Direct;
 790                     if (MultipleShape *Multiple =
 791                             dyn_cast<MultipleShape>(Details->Ancestor))
 792                       Multiple->Breaks--;
 793                   } else if (Details->Type == Branch::Break && LastLoop &&
 794                              LastLoop->Natural == Details->Ancestor->Natural) {
 795                     // it is important to simplify breaks, as simpler breaks
 796                     // enable other optimizations
 797                     Details->Labeled = false;
 798                     if (MultipleShape *Multiple =
 799                             dyn_cast<MultipleShape>(Details->Ancestor))
 800                       Multiple->Breaks--;
 801                   }
 802                 }
 803               }
 804             }, [&](MultipleShape* Multiple)
 805             {
 806               for (const auto &iter : Multiple->InnerMap) {
 807                 RemoveUnneededFlows(iter.second, Multiple->Next,
 808                                     Multiple->Breaks ? nullptr : LastLoop,
 809                                     Depth + 1);
 810               }
 811               Next = Multiple->Next;
 812             }, [&](LoopShape* Loop)
 813             {
 814               RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
 815               Next = Loop->Next;
 816             });
 817       }
 818     }
 819
 820     // After we know which loops exist, we can calculate which need to be
 821     // labeled
 822     void FindLabeledLoops(Shape *Root) {
 823       Shape *Next = Root;
 824       while (Next) {
 825         Root = Next;
 826         Next = nullptr;
 827
 828         ShapeSwitch(
 829             Root,
 830             [&](SimpleShape *Simple) {
 831           MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
 832           // If we are fusing a Multiple with a loop into this Simple, then
 833           // visit it now
 834           if (Fused && Fused->Breaks)
 835             LoopStack.push(Fused);
 836           if (Simple->Inner->BranchVar)
 837             LoopStack.push(nullptr); // a switch means breaks are now useless,
 838                                      // push a dummy
 839           if (Fused) {
 840             if (Fused->UseSwitch)
 841               LoopStack.push(nullptr); // a switch means breaks are now
 842                                        // useless, push a dummy
 843             for (const auto &iter : Fused->InnerMap) {
 844               FindLabeledLoops(iter.second);
 845             }
 846           }
 847           for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
 848             Branch *Details = iter.second.get();
 849             if (Details->Type == Branch::Break ||
 850                 Details->Type == Branch::Continue) {
 851               assert(!LoopStack.empty());
 852               if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
 853                 if (MultipleShape *Multiple =
 854                         dyn_cast<MultipleShape>(Details->Ancestor)) {
 855                   Multiple->Labeled = true;
 856                 } else {
 857                   LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
 858                   Loop->Labeled = true;
 859                 }
 860               } else {
 861                 Details->Labeled = false;
 862               }
 863             }
 864             if (Fused && Fused->UseSwitch)
 865               LoopStack.pop();
 866             if (Simple->Inner->BranchVar)
 867               LoopStack.pop();
 868             if (Fused && Fused->Breaks)
 869               LoopStack.pop();
 870             if (Fused)
 871               Next = Fused->Next;
 872             else
 873               Next = Root->Next;
 874           }
 875           }
 876           , [&](MultipleShape* Multiple) {
 877             if (Multiple->Breaks)
 878               LoopStack.push(Multiple);
 879             for (const auto &iter : Multiple->InnerMap)
 880               FindLabeledLoops(iter.second);
 881             if (Multiple->Breaks)
 882               LoopStack.pop();
 883             Next = Root->Next;
 884           }
 885           , [&](LoopShape* Loop) {
 886             LoopStack.push(Loop);
 887             FindLabeledLoops(Loop->Inner);
 888             LoopStack.pop();
 889             Next = Root->Next;
 890           });
 891       }
 892     }
 893
 894     void Process(Shape * Root) {
 895       FindNaturals(Root);
 896       RemoveUnneededFlows(Root);
 897       FindLabeledLoops(Root);
 898     }
 899   };
 900
 901   PostOptimizer(this).Process(Root);
 902 }
 903
 904 } // namespace Relooper
 905
 906 } // namespace llvm