lib/Transforms/Utils/PromoteMemoryToRegister.cpp

   1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file promotes memory references to be register references.  It promotes
  11 // alloca instructions which only have loads and stores as uses.  An alloca is
  12 // transformed by using iterated dominator frontiers to place PHI nodes, then
  13 // traversing the function in depth-first order to rewrite loads and stores as
  14 // appropriate.
  15 //
  16 // The algorithm used here is based on:
  17 //
  18 //   Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
  19 //   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
  20 //   Programming Languages
  21 //   POPL '95. ACM, New York, NY, 62-73.
  22 //
  23 // It has been modified to not explicitly use the DJ graph data structure and to
  24 // directly compute pruned SSA using per-variable liveness information.
  25 //
  26 //===----------------------------------------------------------------------===//
  27
  28 #define DEBUG_TYPE "mem2reg"
  29 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  30 #include "llvm/ADT/DenseMap.h"
  31 #include "llvm/ADT/Hashing.h"
  32 #include "llvm/ADT/STLExtras.h"
  33 #include "llvm/ADT/SmallPtrSet.h"
  34 #include "llvm/ADT/SmallVector.h"
  35 #include "llvm/ADT/Statistic.h"
  36 #include "llvm/Analysis/AliasSetTracker.h"
  37 #include "llvm/Analysis/Dominators.h"
  38 #include "llvm/Analysis/InstructionSimplify.h"
  39 #include "llvm/Analysis/ValueTracking.h"
  40 #include "llvm/DIBuilder.h"
  41 #include "llvm/DebugInfo.h"
  42 #include "llvm/IR/Constants.h"
  43 #include "llvm/IR/DerivedTypes.h"
  44 #include "llvm/IR/Function.h"
  45 #include "llvm/IR/Instructions.h"
  46 #include "llvm/IR/IntrinsicInst.h"
  47 #include "llvm/IR/Metadata.h"
  48 #include "llvm/Support/CFG.h"
  49 #include "llvm/Transforms/Utils/Local.h"
  50 #include <algorithm>
  51 #include <queue>
  52 using namespace llvm;
  53
  54 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
  55 STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
  56 STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
  57 STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
  58
  59 bool llvm::isAllocaPromotable(const AllocaInst *AI) {
  60   // FIXME: If the memory unit is of pointer or integer type, we can permit
  61   // assignments to subsections of the memory unit.
  62
  63   // Only allow direct and non-volatile loads and stores...
  64   for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
  65        UI != UE; ++UI) { // Loop over all of the uses of the alloca
  66     const User *U = *UI;
  67     if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
  68       // Note that atomic loads can be transformed; atomic semantics do
  69       // not have any meaning for a local alloca.
  70       if (LI->isVolatile())
  71         return false;
  72     } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
  73       if (SI->getOperand(0) == AI)
  74         return false; // Don't allow a store OF the AI, only INTO the AI.
  75       // Note that atomic stores can be transformed; atomic semantics do
  76       // not have any meaning for a local alloca.
  77       if (SI->isVolatile())
  78         return false;
  79     } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
  80       if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
  81           II->getIntrinsicID() != Intrinsic::lifetime_end)
  82         return false;
  83     } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
  84       if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
  85         return false;
  86       if (!onlyUsedByLifetimeMarkers(BCI))
  87         return false;
  88     } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
  89       if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
  90         return false;
  91       if (!GEPI->hasAllZeroIndices())
  92         return false;
  93       if (!onlyUsedByLifetimeMarkers(GEPI))
  94         return false;
  95     } else {
  96       return false;
  97     }
  98   }
  99
 100   return true;
 101 }
 102
 103 namespace {
 104
 105 struct AllocaInfo {
 106   SmallVector<BasicBlock *, 32> DefiningBlocks;
 107   SmallVector<BasicBlock *, 32> UsingBlocks;
 108
 109   StoreInst *OnlyStore;
 110   BasicBlock *OnlyBlock;
 111   bool OnlyUsedInOneBlock;
 112
 113   Value *AllocaPointerVal;
 114   DbgDeclareInst *DbgDeclare;
 115
 116   void clear() {
 117     DefiningBlocks.clear();
 118     UsingBlocks.clear();
 119     OnlyStore = 0;
 120     OnlyBlock = 0;
 121     OnlyUsedInOneBlock = true;
 122     AllocaPointerVal = 0;
 123     DbgDeclare = 0;
 124   }
 125
 126   /// Scan the uses of the specified alloca, filling in the AllocaInfo used
 127   /// by the rest of the pass to reason about the uses of this alloca.
 128   void AnalyzeAlloca(AllocaInst *AI) {
 129     clear();
 130
 131     // As we scan the uses of the alloca instruction, keep track of stores,
 132     // and decide whether all of the loads and stores to the alloca are within
 133     // the same basic block.
 134     for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
 135          UI != E;) {
 136       Instruction *User = cast<Instruction>(*UI++);
 137
 138       if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
 139         // Remember the basic blocks which define new values for the alloca
 140         DefiningBlocks.push_back(SI->getParent());
 141         AllocaPointerVal = SI->getOperand(0);
 142         OnlyStore = SI;
 143       } else {
 144         LoadInst *LI = cast<LoadInst>(User);
 145         // Otherwise it must be a load instruction, keep track of variable
 146         // reads.
 147         UsingBlocks.push_back(LI->getParent());
 148         AllocaPointerVal = LI;
 149       }
 150
 151       if (OnlyUsedInOneBlock) {
 152         if (OnlyBlock == 0)
 153           OnlyBlock = User->getParent();
 154         else if (OnlyBlock != User->getParent())
 155           OnlyUsedInOneBlock = false;
 156       }
 157     }
 158
 159     DbgDeclare = FindAllocaDbgDeclare(AI);
 160   }
 161 };
 162
 163 // Data package used by RenamePass()
 164 class RenamePassData {
 165 public:
 166   typedef std::vector<Value *> ValVector;
 167
 168   RenamePassData() : BB(NULL), Pred(NULL), Values() {}
 169   RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
 170       : BB(B), Pred(P), Values(V) {}
 171   BasicBlock *BB;
 172   BasicBlock *Pred;
 173   ValVector Values;
 174
 175   void swap(RenamePassData &RHS) {
 176     std::swap(BB, RHS.BB);
 177     std::swap(Pred, RHS.Pred);
 178     Values.swap(RHS.Values);
 179   }
 180 };
 181
 182 /// \brief This assigns and keeps a per-bb relative ordering of load/store
 183 /// instructions in the block that directly load or store an alloca.
 184 ///
 185 /// This functionality is important because it avoids scanning large basic
 186 /// blocks multiple times when promoting many allocas in the same block.
 187 class LargeBlockInfo {
 188   /// \brief For each instruction that we track, keep the index of the
 189   /// instruction.
 190   ///
 191   /// The index starts out as the number of the instruction from the start of
 192   /// the block.
 193   DenseMap<const Instruction *, unsigned> InstNumbers;
 194
 195 public:
 196
 197   /// This code only looks at accesses to allocas.
 198   static bool isInterestingInstruction(const Instruction *I) {
 199     return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
 200            (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
 201   }
 202
 203   /// Get or calculate the index of the specified instruction.
 204   unsigned getInstructionIndex(const Instruction *I) {
 205     assert(isInterestingInstruction(I) &&
 206            "Not a load/store to/from an alloca?");
 207
 208     // If we already have this instruction number, return it.
 209     DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
 210     if (It != InstNumbers.end())
 211       return It->second;
 212
 213     // Scan the whole block to get the instruction.  This accumulates
 214     // information for every interesting instruction in the block, in order to
 215     // avoid gratuitus rescans.
 216     const BasicBlock *BB = I->getParent();
 217     unsigned InstNo = 0;
 218     for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
 219          ++BBI)
 220       if (isInterestingInstruction(BBI))
 221         InstNumbers[BBI] = InstNo++;
 222     It = InstNumbers.find(I);
 223
 224     assert(It != InstNumbers.end() && "Didn't insert instruction?");
 225     return It->second;
 226   }
 227
 228   void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
 229
 230   void clear() { InstNumbers.clear(); }
 231 };
 232
 233 struct PromoteMem2Reg {
 234   /// The alloca instructions being promoted.
 235   std::vector<AllocaInst *> Allocas;
 236   DominatorTree &DT;
 237   DIBuilder DIB;
 238
 239   /// An AliasSetTracker object to update.  If null, don't update it.
 240   AliasSetTracker *AST;
 241
 242   /// Reverse mapping of Allocas.
 243   DenseMap<AllocaInst *, unsigned> AllocaLookup;
 244
 245   /// \brief The PhiNodes we're adding.
 246   ///
 247   /// That map is used to simplify some Phi nodes as we iterate over it, so
 248   /// it should have deterministic iterators.  We could use a MapVector, but
 249   /// since we already maintain a map from BasicBlock* to a stable numbering
 250   /// (BBNumbers), the DenseMap is more efficient (also supports removal).
 251   DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
 252
 253   /// For each PHI node, keep track of which entry in Allocas it corresponds
 254   /// to.
 255   DenseMap<PHINode *, unsigned> PhiToAllocaMap;
 256
 257   /// If we are updating an AliasSetTracker, then for each alloca that is of
 258   /// pointer type, we keep track of what to copyValue to the inserted PHI
 259   /// nodes here.
 260   std::vector<Value *> PointerAllocaValues;
 261
 262   /// For each alloca, we keep track of the dbg.declare intrinsic that
 263   /// describes it, if any, so that we can convert it to a dbg.value
 264   /// intrinsic if the alloca gets promoted.
 265   SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
 266
 267   /// The set of basic blocks the renamer has already visited.
 268   ///
 269   SmallPtrSet<BasicBlock *, 16> Visited;
 270
 271   /// Contains a stable numbering of basic blocks to avoid non-determinstic
 272   /// behavior.
 273   DenseMap<BasicBlock *, unsigned> BBNumbers;
 274
 275   /// Maps DomTreeNodes to their level in the dominator tree.
 276   DenseMap<DomTreeNode *, unsigned> DomLevels;
 277
 278   /// Lazily compute the number of predecessors a block has.
 279   DenseMap<const BasicBlock *, unsigned> BBNumPreds;
 280
 281 public:
 282   PromoteMem2Reg(const std::vector<AllocaInst *> &Allocas, DominatorTree &DT,
 283                  AliasSetTracker *AST)
 284       : Allocas(Allocas), DT(DT), DIB(*DT.getRoot()->getParent()->getParent()),
 285         AST(AST) {}
 286
 287   void run();
 288
 289 private:
 290   void RemoveFromAllocasList(unsigned &AllocaIdx) {
 291     Allocas[AllocaIdx] = Allocas.back();
 292     Allocas.pop_back();
 293     --AllocaIdx;
 294   }
 295
 296   unsigned getNumPreds(const BasicBlock *BB) {
 297     unsigned &NP = BBNumPreds[BB];
 298     if (NP == 0)
 299       NP = std::distance(pred_begin(BB), pred_end(BB)) + 1;
 300     return NP - 1;
 301   }
 302
 303   void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
 304                                AllocaInfo &Info);
 305   void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
 306                            const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
 307                            SmallPtrSet<BasicBlock *, 32> &LiveInBlocks);
 308   void RenamePass(BasicBlock *BB, BasicBlock *Pred,
 309                   RenamePassData::ValVector &IncVals,
 310                   std::vector<RenamePassData> &Worklist);
 311   bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
 312 };
 313
 314 } // end of anonymous namespace
 315
 316 static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
 317   // Knowing that this alloca is promotable, we know that it's safe to kill all
 318   // instructions except for load and store.
 319
 320   for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
 321        UI != UE;) {
 322     Instruction *I = cast<Instruction>(*UI);
 323     ++UI;
 324     if (isa<LoadInst>(I) || isa<StoreInst>(I))
 325       continue;
 326
 327     if (!I->getType()->isVoidTy()) {
 328       // The only users of this bitcast/GEP instruction are lifetime intrinsics.
 329       // Follow the use/def chain to erase them now instead of leaving it for
 330       // dead code elimination later.
 331       for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
 332            UI != UE;) {
 333         Instruction *Inst = cast<Instruction>(*UI);
 334         ++UI;
 335         Inst->eraseFromParent();
 336       }
 337     }
 338     I->eraseFromParent();
 339   }
 340 }
 341
 342 /// \brief Rewrite as many loads as possible given a single store.
 343 ///
 344 /// When there is only a single store, we can use the domtree to trivially
 345 /// replace all of the dominated loads with the stored value. Do so, and return
 346 /// true if this has successfully promoted the alloca entirely. If this returns
 347 /// false there were some loads which were not dominated by the single store
 348 /// and thus must be phi-ed with undef. We fall back to the standard alloca
 349 /// promotion algorithm in that case.
 350 static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
 351                                      LargeBlockInfo &LBI,
 352                                      DominatorTree &DT,
 353                                      AliasSetTracker *AST) {
 354   StoreInst *OnlyStore = Info.OnlyStore;
 355   bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
 356   BasicBlock *StoreBB = OnlyStore->getParent();
 357   int StoreIndex = -1;
 358
 359   // Clear out UsingBlocks.  We will reconstruct it here if needed.
 360   Info.UsingBlocks.clear();
 361
 362   for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
 363     Instruction *UserInst = cast<Instruction>(*UI++);
 364     if (!isa<LoadInst>(UserInst)) {
 365       assert(UserInst == OnlyStore && "Should only have load/stores");
 366       continue;
 367     }
 368     LoadInst *LI = cast<LoadInst>(UserInst);
 369
 370     // Okay, if we have a load from the alloca, we want to replace it with the
 371     // only value stored to the alloca.  We can do this if the value is
 372     // dominated by the store.  If not, we use the rest of the mem2reg machinery
 373     // to insert the phi nodes as needed.
 374     if (!StoringGlobalVal) { // Non-instructions are always dominated.
 375       if (LI->getParent() == StoreBB) {
 376         // If we have a use that is in the same block as the store, compare the
 377         // indices of the two instructions to see which one came first.  If the
 378         // load came before the store, we can't handle it.
 379         if (StoreIndex == -1)
 380           StoreIndex = LBI.getInstructionIndex(OnlyStore);
 381
 382         if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
 383           // Can't handle this load, bail out.
 384           Info.UsingBlocks.push_back(StoreBB);
 385           continue;
 386         }
 387
 388       } else if (LI->getParent() != StoreBB &&
 389                  !DT.dominates(StoreBB, LI->getParent())) {
 390         // If the load and store are in different blocks, use BB dominance to
 391         // check their relationships.  If the store doesn't dom the use, bail
 392         // out.
 393         Info.UsingBlocks.push_back(LI->getParent());
 394         continue;
 395       }
 396     }
 397
 398     // Otherwise, we *can* safely rewrite this load.
 399     Value *ReplVal = OnlyStore->getOperand(0);
 400     // If the replacement value is the load, this must occur in unreachable
 401     // code.
 402     if (ReplVal == LI)
 403       ReplVal = UndefValue::get(LI->getType());
 404     LI->replaceAllUsesWith(ReplVal);
 405     if (AST && LI->getType()->isPointerTy())
 406       AST->deleteValue(LI);
 407     LI->eraseFromParent();
 408     LBI.deleteValue(LI);
 409   }
 410
 411   // Finally, after the scan, check to see if the store is all that is left.
 412   if (!Info.UsingBlocks.empty())
 413     return false; // If not, we'll have to fall back for the remainder.
 414
 415   // Record debuginfo for the store and remove the declaration's
 416   // debuginfo.
 417   if (DbgDeclareInst *DDI = Info.DbgDeclare) {
 418     DIBuilder DIB(*AI->getParent()->getParent()->getParent());
 419     ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
 420     DDI->eraseFromParent();
 421   }
 422   // Remove the (now dead) store and alloca.
 423   Info.OnlyStore->eraseFromParent();
 424   LBI.deleteValue(Info.OnlyStore);
 425
 426   if (AST)
 427     AST->deleteValue(AI);
 428   AI->eraseFromParent();
 429   LBI.deleteValue(AI);
 430   return true;
 431 }
 432
 433 namespace {
 434 /// This is a helper predicate used to search by the first element of a pair.
 435 struct StoreIndexSearchPredicate {
 436   bool operator()(const std::pair<unsigned, StoreInst *> &LHS,
 437                   const std::pair<unsigned, StoreInst *> &RHS) {
 438     return LHS.first < RHS.first;
 439   }
 440 };
 441 }
 442
 443 /// Many allocas are only used within a single basic block.  If this is the
 444 /// case, avoid traversing the CFG and inserting a lot of potentially useless
 445 /// PHI nodes by just performing a single linear pass over the basic block
 446 /// using the Alloca.
 447 ///
 448 /// If we cannot promote this alloca (because it is read before it is written),
 449 /// return true.  This is necessary in cases where, due to control flow, the
 450 /// alloca is potentially undefined on some control flow paths.  e.g. code like
 451 /// this is potentially correct:
 452 ///
 453 ///   for (...) { if (c) { A = undef; undef = B; } }
 454 ///
 455 /// ... so long as A is not used before undef is set.
 456 static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
 457                                      LargeBlockInfo &LBI,
 458                                      AliasSetTracker *AST) {
 459   // The trickiest case to handle is when we have large blocks. Because of this,
 460   // this code is optimized assuming that large blocks happen.  This does not
 461   // significantly pessimize the small block case.  This uses LargeBlockInfo to
 462   // make it efficient to get the index of various operations in the block.
 463
 464   // Walk the use-def list of the alloca, getting the locations of all stores.
 465   typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
 466   StoresByIndexTy StoresByIndex;
 467
 468   for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;
 469        ++UI)
 470     if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
 471       StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
 472
 473   // Sort the stores by their index, making it efficient to do a lookup with a
 474   // binary search.
 475   std::sort(StoresByIndex.begin(), StoresByIndex.end());
 476
 477   // Walk all of the loads from this alloca, replacing them with the nearest
 478   // store above them, if any.
 479   for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
 480     LoadInst *LI = dyn_cast<LoadInst>(*UI++);
 481     if (!LI)
 482       continue;
 483
 484     unsigned LoadIdx = LBI.getInstructionIndex(LI);
 485
 486     // Find the nearest store that has a lower than this load.
 487     StoresByIndexTy::iterator I = std::lower_bound(
 488         StoresByIndex.begin(), StoresByIndex.end(),
 489         std::pair<unsigned, StoreInst *>(LoadIdx, static_cast<StoreInst *>(0)),
 490         StoreIndexSearchPredicate());
 491
 492     if (I == StoresByIndex.begin())
 493       // If there is no store before this load, the load takes the undef value.
 494       LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
 495     else
 496       // Otherwise, there was a store before this load, the load takes its value.
 497       LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0));
 498
 499     if (AST && LI->getType()->isPointerTy())
 500       AST->deleteValue(LI);
 501     LI->eraseFromParent();
 502     LBI.deleteValue(LI);
 503   }
 504
 505   // Remove the (now dead) stores and alloca.
 506   while (!AI->use_empty()) {
 507     StoreInst *SI = cast<StoreInst>(AI->use_back());
 508     // Record debuginfo for the store before removing it.
 509     if (DbgDeclareInst *DDI = Info.DbgDeclare) {
 510       DIBuilder DIB(*AI->getParent()->getParent()->getParent());
 511       ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
 512     }
 513     SI->eraseFromParent();
 514     LBI.deleteValue(SI);
 515   }
 516
 517   if (AST)
 518     AST->deleteValue(AI);
 519   AI->eraseFromParent();
 520   LBI.deleteValue(AI);
 521
 522   // The alloca's debuginfo can be removed as well.
 523   if (DbgDeclareInst *DDI = Info.DbgDeclare)
 524     DDI->eraseFromParent();
 525
 526   ++NumLocalPromoted;
 527 }
 528
 529 void PromoteMem2Reg::run() {
 530   Function &F = *DT.getRoot()->getParent();
 531
 532   if (AST)
 533     PointerAllocaValues.resize(Allocas.size());
 534   AllocaDbgDeclares.resize(Allocas.size());
 535
 536   AllocaInfo Info;
 537   LargeBlockInfo LBI;
 538
 539   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
 540     AllocaInst *AI = Allocas[AllocaNum];
 541
 542     assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
 543     assert(AI->getParent()->getParent() == &F &&
 544            "All allocas should be in the same function, which is same as DF!");
 545
 546     removeLifetimeIntrinsicUsers(AI);
 547
 548     if (AI->use_empty()) {
 549       // If there are no uses of the alloca, just delete it now.
 550       if (AST)
 551         AST->deleteValue(AI);
 552       AI->eraseFromParent();
 553
 554       // Remove the alloca from the Allocas list, since it has been processed
 555       RemoveFromAllocasList(AllocaNum);
 556       ++NumDeadAlloca;
 557       continue;
 558     }
 559
 560     // Calculate the set of read and write-locations for each alloca.  This is
 561     // analogous to finding the 'uses' and 'definitions' of each variable.
 562     Info.AnalyzeAlloca(AI);
 563
 564     // If there is only a single store to this value, replace any loads of
 565     // it that are directly dominated by the definition with the value stored.
 566     if (Info.DefiningBlocks.size() == 1) {
 567       if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
 568         // The alloca has been processed, move on.
 569         RemoveFromAllocasList(AllocaNum);
 570         ++NumSingleStore;
 571         continue;
 572       }
 573     }
 574
 575     // If the alloca is only read and written in one basic block, just perform a
 576     // linear sweep over the block to eliminate it.
 577     if (Info.OnlyUsedInOneBlock) {
 578       promoteSingleBlockAlloca(AI, Info, LBI, AST);
 579
 580       // The alloca has been processed, move on.
 581       RemoveFromAllocasList(AllocaNum);
 582       continue;
 583     }
 584
 585     // If we haven't computed dominator tree levels, do so now.
 586     if (DomLevels.empty()) {
 587       SmallVector<DomTreeNode *, 32> Worklist;
 588
 589       DomTreeNode *Root = DT.getRootNode();
 590       DomLevels[Root] = 0;
 591       Worklist.push_back(Root);
 592
 593       while (!Worklist.empty()) {
 594         DomTreeNode *Node = Worklist.pop_back_val();
 595         unsigned ChildLevel = DomLevels[Node] + 1;
 596         for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
 597              CI != CE; ++CI) {
 598           DomLevels[*CI] = ChildLevel;
 599           Worklist.push_back(*CI);
 600         }
 601       }
 602     }
 603
 604     // If we haven't computed a numbering for the BB's in the function, do so
 605     // now.
 606     if (BBNumbers.empty()) {
 607       unsigned ID = 0;
 608       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
 609         BBNumbers[I] = ID++;
 610     }
 611
 612     // If we have an AST to keep updated, remember some pointer value that is
 613     // stored into the alloca.
 614     if (AST)
 615       PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
 616
 617     // Remember the dbg.declare intrinsic describing this alloca, if any.
 618     if (Info.DbgDeclare)
 619       AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
 620
 621     // Keep the reverse mapping of the 'Allocas' array for the rename pass.
 622     AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
 623
 624     // At this point, we're committed to promoting the alloca using IDF's, and
 625     // the standard SSA construction algorithm.  Determine which blocks need PHI
 626     // nodes and see if we can optimize out some work by avoiding insertion of
 627     // dead phi nodes.
 628     DetermineInsertionPoint(AI, AllocaNum, Info);
 629   }
 630
 631   if (Allocas.empty())
 632     return; // All of the allocas must have been trivial!
 633
 634   LBI.clear();
 635
 636   // Set the incoming values for the basic block to be null values for all of
 637   // the alloca's.  We do this in case there is a load of a value that has not
 638   // been stored yet.  In this case, it will get this null value.
 639   //
 640   RenamePassData::ValVector Values(Allocas.size());
 641   for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
 642     Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
 643
 644   // Walks all basic blocks in the function performing the SSA rename algorithm
 645   // and inserting the phi nodes we marked as necessary
 646   //
 647   std::vector<RenamePassData> RenamePassWorkList;
 648   RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
 649   do {
 650     RenamePassData RPD;
 651     RPD.swap(RenamePassWorkList.back());
 652     RenamePassWorkList.pop_back();
 653     // RenamePass may add new worklist entries.
 654     RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
 655   } while (!RenamePassWorkList.empty());
 656
 657   // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
 658   Visited.clear();
 659
 660   // Remove the allocas themselves from the function.
 661   for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
 662     Instruction *A = Allocas[i];
 663
 664     // If there are any uses of the alloca instructions left, they must be in
 665     // unreachable basic blocks that were not processed by walking the dominator
 666     // tree. Just delete the users now.
 667     if (!A->use_empty())
 668       A->replaceAllUsesWith(UndefValue::get(A->getType()));
 669     if (AST)
 670       AST->deleteValue(A);
 671     A->eraseFromParent();
 672   }
 673
 674   // Remove alloca's dbg.declare instrinsics from the function.
 675   for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
 676     if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
 677       DDI->eraseFromParent();
 678
 679   // Loop over all of the PHI nodes and see if there are any that we can get
 680   // rid of because they merge all of the same incoming values.  This can
 681   // happen due to undef values coming into the PHI nodes.  This process is
 682   // iterative, because eliminating one PHI node can cause others to be removed.
 683   bool EliminatedAPHI = true;
 684   while (EliminatedAPHI) {
 685     EliminatedAPHI = false;
 686
 687     // Iterating over NewPhiNodes is deterministic, so it is safe to try to
 688     // simplify and RAUW them as we go.  If it was not, we could add uses to
 689     // the values we replace with in a non deterministic order, thus creating
 690     // non deterministic def->use chains.
 691     for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
 692              I = NewPhiNodes.begin(),
 693              E = NewPhiNodes.end();
 694          I != E;) {
 695       PHINode *PN = I->second;
 696
 697       // If this PHI node merges one value and/or undefs, get the value.
 698       if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
 699         if (AST && PN->getType()->isPointerTy())
 700           AST->deleteValue(PN);
 701         PN->replaceAllUsesWith(V);
 702         PN->eraseFromParent();
 703         NewPhiNodes.erase(I++);
 704         EliminatedAPHI = true;
 705         continue;
 706       }
 707       ++I;
 708     }
 709   }
 710
 711   // At this point, the renamer has added entries to PHI nodes for all reachable
 712   // code.  Unfortunately, there may be unreachable blocks which the renamer
 713   // hasn't traversed.  If this is the case, the PHI nodes may not
 714   // have incoming values for all predecessors.  Loop over all PHI nodes we have
 715   // created, inserting undef values if they are missing any incoming values.
 716   //
 717   for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
 718            I = NewPhiNodes.begin(),
 719            E = NewPhiNodes.end();
 720        I != E; ++I) {
 721     // We want to do this once per basic block.  As such, only process a block
 722     // when we find the PHI that is the first entry in the block.
 723     PHINode *SomePHI = I->second;
 724     BasicBlock *BB = SomePHI->getParent();
 725     if (&BB->front() != SomePHI)
 726       continue;
 727
 728     // Only do work here if there the PHI nodes are missing incoming values.  We
 729     // know that all PHI nodes that were inserted in a block will have the same
 730     // number of incoming values, so we can just check any of them.
 731     if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
 732       continue;
 733
 734     // Get the preds for BB.
 735     SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
 736
 737     // Ok, now we know that all of the PHI nodes are missing entries for some
 738     // basic blocks.  Start by sorting the incoming predecessors for efficient
 739     // access.
 740     std::sort(Preds.begin(), Preds.end());
 741
 742     // Now we loop through all BB's which have entries in SomePHI and remove
 743     // them from the Preds list.
 744     for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
 745       // Do a log(n) search of the Preds list for the entry we want.
 746       SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
 747           Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
 748       assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
 749              "PHI node has entry for a block which is not a predecessor!");
 750
 751       // Remove the entry
 752       Preds.erase(EntIt);
 753     }
 754
 755     // At this point, the blocks left in the preds list must have dummy
 756     // entries inserted into every PHI nodes for the block.  Update all the phi
 757     // nodes in this block that we are inserting (there could be phis before
 758     // mem2reg runs).
 759     unsigned NumBadPreds = SomePHI->getNumIncomingValues();
 760     BasicBlock::iterator BBI = BB->begin();
 761     while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
 762            SomePHI->getNumIncomingValues() == NumBadPreds) {
 763       Value *UndefVal = UndefValue::get(SomePHI->getType());
 764       for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
 765         SomePHI->addIncoming(UndefVal, Preds[pred]);
 766     }
 767   }
 768
 769   NewPhiNodes.clear();
 770 }
 771
 772 /// \brief Determine which blocks the value is live in.
 773 ///
 774 /// These are blocks which lead to uses.  Knowing this allows us to avoid
 775 /// inserting PHI nodes into blocks which don't lead to uses (thus, the
 776 /// inserted phi nodes would be dead).
 777 void PromoteMem2Reg::ComputeLiveInBlocks(
 778     AllocaInst *AI, AllocaInfo &Info,
 779     const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
 780     SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) {
 781
 782   // To determine liveness, we must iterate through the predecessors of blocks
 783   // where the def is live.  Blocks are added to the worklist if we need to
 784   // check their predecessors.  Start with all the using blocks.
 785   SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
 786                                                     Info.UsingBlocks.end());
 787
 788   // If any of the using blocks is also a definition block, check to see if the
 789   // definition occurs before or after the use.  If it happens before the use,
 790   // the value isn't really live-in.
 791   for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
 792     BasicBlock *BB = LiveInBlockWorklist[i];
 793     if (!DefBlocks.count(BB))
 794       continue;
 795
 796     // Okay, this is a block that both uses and defines the value.  If the first
 797     // reference to the alloca is a def (store), then we know it isn't live-in.
 798     for (BasicBlock::iterator I = BB->begin();; ++I) {
 799       if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
 800         if (SI->getOperand(1) != AI)
 801           continue;
 802
 803         // We found a store to the alloca before a load.  The alloca is not
 804         // actually live-in here.
 805         LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
 806         LiveInBlockWorklist.pop_back();
 807         --i, --e;
 808         break;
 809       }
 810
 811       if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
 812         if (LI->getOperand(0) != AI)
 813           continue;
 814
 815         // Okay, we found a load before a store to the alloca.  It is actually
 816         // live into this block.
 817         break;
 818       }
 819     }
 820   }
 821
 822   // Now that we have a set of blocks where the phi is live-in, recursively add
 823   // their predecessors until we find the full region the value is live.
 824   while (!LiveInBlockWorklist.empty()) {
 825     BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
 826
 827     // The block really is live in here, insert it into the set.  If already in
 828     // the set, then it has already been processed.
 829     if (!LiveInBlocks.insert(BB))
 830       continue;
 831
 832     // Since the value is live into BB, it is either defined in a predecessor or
 833     // live into it to.  Add the preds to the worklist unless they are a
 834     // defining block.
 835     for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
 836       BasicBlock *P = *PI;
 837
 838       // The value is not live into a predecessor if it defines the value.
 839       if (DefBlocks.count(P))
 840         continue;
 841
 842       // Otherwise it is, add to the worklist.
 843       LiveInBlockWorklist.push_back(P);
 844     }
 845   }
 846 }
 847
 848 namespace {
 849 typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
 850
 851 struct DomTreeNodeCompare {
 852   bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
 853     return LHS.second < RHS.second;
 854   }
 855 };
 856 } // end anonymous namespace
 857
 858 /// At this point, we're committed to promoting the alloca using IDF's, and the
 859 /// standard SSA construction algorithm.  Determine which blocks need phi nodes
 860 /// and see if we can optimize out some work by avoiding insertion of dead phi
 861 /// nodes.
 862 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
 863                                              AllocaInfo &Info) {
 864   // Unique the set of defining blocks for efficient lookup.
 865   SmallPtrSet<BasicBlock *, 32> DefBlocks;
 866   DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
 867
 868   // Determine which blocks the value is live in.  These are blocks which lead
 869   // to uses.
 870   SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
 871   ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
 872
 873   // Use a priority queue keyed on dominator tree level so that inserted nodes
 874   // are handled from the bottom of the dominator tree upwards.
 875   typedef std::priority_queue<DomTreeNodePair,
 876                               SmallVector<DomTreeNodePair, 32>,
 877                               DomTreeNodeCompare> IDFPriorityQueue;
 878   IDFPriorityQueue PQ;
 879
 880   for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
 881                                                      E = DefBlocks.end();
 882        I != E; ++I) {
 883     if (DomTreeNode *Node = DT.getNode(*I))
 884       PQ.push(std::make_pair(Node, DomLevels[Node]));
 885   }
 886
 887   SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
 888   SmallPtrSet<DomTreeNode *, 32> Visited;
 889   SmallVector<DomTreeNode *, 32> Worklist;
 890   while (!PQ.empty()) {
 891     DomTreeNodePair RootPair = PQ.top();
 892     PQ.pop();
 893     DomTreeNode *Root = RootPair.first;
 894     unsigned RootLevel = RootPair.second;
 895
 896     // Walk all dominator tree children of Root, inspecting their CFG edges with
 897     // targets elsewhere on the dominator tree. Only targets whose level is at
 898     // most Root's level are added to the iterated dominance frontier of the
 899     // definition set.
 900
 901     Worklist.clear();
 902     Worklist.push_back(Root);
 903
 904     while (!Worklist.empty()) {
 905       DomTreeNode *Node = Worklist.pop_back_val();
 906       BasicBlock *BB = Node->getBlock();
 907
 908       for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
 909            ++SI) {
 910         DomTreeNode *SuccNode = DT.getNode(*SI);
 911
 912         // Quickly skip all CFG edges that are also dominator tree edges instead
 913         // of catching them below.
 914         if (SuccNode->getIDom() == Node)
 915           continue;
 916
 917         unsigned SuccLevel = DomLevels[SuccNode];
 918         if (SuccLevel > RootLevel)
 919           continue;
 920
 921         if (!Visited.insert(SuccNode))
 922           continue;
 923
 924         BasicBlock *SuccBB = SuccNode->getBlock();
 925         if (!LiveInBlocks.count(SuccBB))
 926           continue;
 927
 928         DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
 929         if (!DefBlocks.count(SuccBB))
 930           PQ.push(std::make_pair(SuccNode, SuccLevel));
 931       }
 932
 933       for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
 934            ++CI) {
 935         if (!Visited.count(*CI))
 936           Worklist.push_back(*CI);
 937       }
 938     }
 939   }
 940
 941   if (DFBlocks.size() > 1)
 942     std::sort(DFBlocks.begin(), DFBlocks.end());
 943
 944   unsigned CurrentVersion = 0;
 945   for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
 946     QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
 947 }
 948
 949 /// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
 950 ///
 951 /// Returns true if there wasn't already a phi-node for that variable
 952 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
 953                                   unsigned &Version) {
 954   // Look up the basic-block in question.
 955   PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
 956
 957   // If the BB already has a phi node added for the i'th alloca then we're done!
 958   if (PN)
 959     return false;
 960
 961   // Create a PhiNode using the dereferenced type... and add the phi-node to the
 962   // BasicBlock.
 963   PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
 964                        Allocas[AllocaNo]->getName() + "." + Twine(Version++),
 965                        BB->begin());
 966   ++NumPHIInsert;
 967   PhiToAllocaMap[PN] = AllocaNo;
 968
 969   if (AST && PN->getType()->isPointerTy())
 970     AST->copyValue(PointerAllocaValues[AllocaNo], PN);
 971
 972   return true;
 973 }
 974
 975 /// \brief Recursively traverse the CFG of the function, renaming loads and
 976 /// stores to the allocas which we are promoting.
 977 ///
 978 /// IncomingVals indicates what value each Alloca contains on exit from the
 979 /// predecessor block Pred.
 980 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
 981                                 RenamePassData::ValVector &IncomingVals,
 982                                 std::vector<RenamePassData> &Worklist) {
 983 NextIteration:
 984   // If we are inserting any phi nodes into this BB, they will already be in the
 985   // block.
 986   if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
 987     // If we have PHI nodes to update, compute the number of edges from Pred to
 988     // BB.
 989     if (PhiToAllocaMap.count(APN)) {
 990       // We want to be able to distinguish between PHI nodes being inserted by
 991       // this invocation of mem2reg from those phi nodes that already existed in
 992       // the IR before mem2reg was run.  We determine that APN is being inserted
 993       // because it is missing incoming edges.  All other PHI nodes being
 994       // inserted by this pass of mem2reg will have the same number of incoming
 995       // operands so far.  Remember this count.
 996       unsigned NewPHINumOperands = APN->getNumOperands();
 997
 998       unsigned NumEdges = 0;
 999       for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
1000         if (*I == BB)
1001           ++NumEdges;
1002       assert(NumEdges && "Must be at least one edge from Pred to BB!");
1003
1004       // Add entries for all the phis.
1005       BasicBlock::iterator PNI = BB->begin();
1006       do {
1007         unsigned AllocaNo = PhiToAllocaMap[APN];
1008
1009         // Add N incoming values to the PHI node.
1010         for (unsigned i = 0; i != NumEdges; ++i)
1011           APN->addIncoming(IncomingVals[AllocaNo], Pred);
1012
1013         // The currently active variable for this block is now the PHI.
1014         IncomingVals[AllocaNo] = APN;
1015
1016         // Get the next phi node.
1017         ++PNI;
1018         APN = dyn_cast<PHINode>(PNI);
1019         if (APN == 0)
1020           break;
1021
1022         // Verify that it is missing entries.  If not, it is not being inserted
1023         // by this mem2reg invocation so we want to ignore it.
1024       } while (APN->getNumOperands() == NewPHINumOperands);
1025     }
1026   }
1027
1028   // Don't revisit blocks.
1029   if (!Visited.insert(BB))
1030     return;
1031
1032   for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
1033     Instruction *I = II++; // get the instruction, increment iterator
1034
1035     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1036       AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
1037       if (!Src)
1038         continue;
1039
1040       DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
1041       if (AI == AllocaLookup.end())
1042         continue;
1043
1044       Value *V = IncomingVals[AI->second];
1045
1046       // Anything using the load now uses the current value.
1047       LI->replaceAllUsesWith(V);
1048       if (AST && LI->getType()->isPointerTy())
1049         AST->deleteValue(LI);
1050       BB->getInstList().erase(LI);
1051     } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1052       // Delete this instruction and mark the name as the current holder of the
1053       // value
1054       AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
1055       if (!Dest)
1056         continue;
1057
1058       DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
1059       if (ai == AllocaLookup.end())
1060         continue;
1061
1062       // what value were we writing?
1063       IncomingVals[ai->second] = SI->getOperand(0);
1064       // Record debuginfo for the store before removing it.
1065       if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
1066         ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
1067       BB->getInstList().erase(SI);
1068     }
1069   }
1070
1071   // 'Recurse' to our successors.
1072   succ_iterator I = succ_begin(BB), E = succ_end(BB);
1073   if (I == E)
1074     return;
1075
1076   // Keep track of the successors so we don't visit the same successor twice
1077   SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
1078
1079   // Handle the first successor without using the worklist.
1080   VisitedSuccs.insert(*I);
1081   Pred = BB;
1082   BB = *I;
1083   ++I;
1084
1085   for (; I != E; ++I)
1086     if (VisitedSuccs.insert(*I))
1087       Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
1088
1089   goto NextIteration;
1090 }
1091
1092 void llvm::PromoteMemToReg(const std::vector<AllocaInst *> &Allocas,
1093                            DominatorTree &DT, AliasSetTracker *AST) {
1094   // If there is nothing to do, bail out...
1095   if (Allocas.empty())
1096     return;
1097
1098   PromoteMem2Reg(Allocas, DT, AST).run();
1099 }