X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FUtils%2FPromoteMemoryToRegister.cpp;h=e5a00f4e97744c61b59e721fd07a75e5cf2204b9;hb=ee04a6d3a40c3017124e3fd89a0db473a2824498;hp=f895d1955c25ab2a2daa7a4f71d1b3e072775854;hpb=bbe104002fe7398bbde5d5ec2c2e5e324cf72305;p=oota-llvm.git diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index f895d1955c2..e5a00f4e977 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -2,17 +2,26 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by the LLVM research group and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // -// This file promote memory references to be register references. It promotes +// This file promotes memory references to be register references. It promotes // alloca instructions which only have loads and stores as uses. An alloca is -// transformed by using dominator frontiers to place PHI nodes, then traversing -// the function in depth-first order to rewrite loads and stores as appropriate. -// This is just the standard SSA construction algorithm to construct "pruned" -// SSA form. +// transformed by using iterated dominator frontiers to place PHI nodes, then +// traversing the function in depth-first order to rewrite loads and stores as +// appropriate. +// +// The algorithm used here is based on: +// +// Sreedhar and Gao. A linear time algorithm for placing phi-nodes. +// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of +// Programming Languages +// POPL '95. ACM, New York, NY, 62-73. +// +// It has been modified to not explicitly use the DJ graph data structure and to +// directly compute pruned SSA using per-variable liveness information. // //===----------------------------------------------------------------------===// @@ -22,36 +31,46 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Metadata.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include +#include using namespace llvm; STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); +STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -// Provide DenseMapKeyInfo for all pointers. namespace llvm { template<> -struct DenseMapKeyInfo > { - static inline std::pair getEmptyKey() { - return std::make_pair((BasicBlock*)-1, ~0U); +struct DenseMapInfo > { + typedef std::pair EltTy; + static inline EltTy getEmptyKey() { + return EltTy(reinterpret_cast(-1), ~0U); } - static inline std::pair getTombstoneKey() { - return std::make_pair((BasicBlock*)-2, 0U); + static inline EltTy getTombstoneKey() { + return EltTy(reinterpret_cast(-2), 0U); } static unsigned getHashValue(const std::pair &Val) { - return DenseMapKeyInfo::getHashValue(Val.first) + Val.second*2; + return DenseMapInfo::getHashValue(Val.first) + Val.second*2; + } + static bool isEqual(const EltTy &LHS, const EltTy &RHS) { + return LHS == RHS; } - static bool isPod() { return true; } }; } @@ -62,29 +81,51 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. - // Only allow direct loads and stores... - for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) // Loop over all of the uses of the alloca - if (isa(*UI)) { - // noop - } else if (const StoreInst *SI = dyn_cast(*UI)) { + // Only allow direct and non-volatile loads and stores... + for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { // Loop over all of the uses of the alloca + const User *U = *UI; + if (const LoadInst *LI = dyn_cast(U)) { + if (LI->isVolatile()) + return false; + } else if (const StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == AI) return false; // Don't allow a store OF the AI, only INTO the AI. + if (SI->isVolatile()) + return false; + } else if (const IntrinsicInst *II = dyn_cast(U)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } else if (const BitCastInst *BCI = dyn_cast(U)) { + if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!onlyUsedByLifetimeMarkers(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; } else { - return false; // Not a load or store. + return false; } + } return true; } namespace { + struct AllocaInfo; // Data package used by RenamePass() - class VISIBILITY_HIDDEN RenamePassData { + class RenamePassData { public: typedef std::vector ValVector; - RenamePassData() {} + RenamePassData() : BB(NULL), Pred(NULL), Values() {} RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) : BB(B), Pred(P), Values(V) {} BasicBlock *BB; @@ -97,22 +138,73 @@ namespace { Values.swap(RHS.Values); } }; + + /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of + /// load/store instructions in the block that directly load or store an alloca. + /// + /// This functionality is important because it avoids scanning large basic + /// blocks multiple times when promoting many allocas in the same block. + class LargeBlockInfo { + /// InstNumbers - For each instruction that we track, keep the index of the + /// instruction. The index starts out as the number of the instruction from + /// the start of the block. + DenseMap InstNumbers; + public: + + /// isInterestingInstruction - This code only looks at accesses to allocas. + static bool isInterestingInstruction(const Instruction *I) { + return (isa(I) && isa(I->getOperand(0))) || + (isa(I) && isa(I->getOperand(1))); + } + + /// getInstructionIndex - Get or calculate the index of the specified + /// instruction. + unsigned getInstructionIndex(const Instruction *I) { + assert(isInterestingInstruction(I) && + "Not a load/store to/from an alloca?"); + + // If we already have this instruction number, return it. + DenseMap::iterator It = InstNumbers.find(I); + if (It != InstNumbers.end()) return It->second; + + // Scan the whole block to get the instruction. This accumulates + // information for every interesting instruction in the block, in order to + // avoid gratuitus rescans. + const BasicBlock *BB = I->getParent(); + unsigned InstNo = 0; + for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); + BBI != E; ++BBI) + if (isInterestingInstruction(BBI)) + InstNumbers[BBI] = InstNo++; + It = InstNumbers.find(I); + + assert(It != InstNumbers.end() && "Didn't insert instruction?"); + return It->second; + } + + void deleteValue(const Instruction *I) { + InstNumbers.erase(I); + } + + void clear() { + InstNumbers.clear(); + } + }; - struct VISIBILITY_HIDDEN PromoteMem2Reg { + struct PromoteMem2Reg { /// Allocas - The alloca instructions being promoted. /// std::vector Allocas; - SmallVector &RetryList; DominatorTree &DT; - DominanceFrontier &DF; + DIBuilder *DIB; /// AST - An AliasSetTracker object to update. If null, don't update it. /// AliasSetTracker *AST; - + /// AllocaLookup - Reverse mapping of Allocas. /// - std::map AllocaLookup; + DenseMap AllocaLookup; /// NewPhiNodes - The PhiNodes we're adding. /// @@ -128,6 +220,11 @@ namespace { /// std::vector PointerAllocaValues; + /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare + /// intrinsic that describes it, if any, so that we can convert it to a + /// dbg.value intrinsic if the alloca gets promoted. + SmallVector AllocaDbgDeclares; + /// Visited - The set of basic blocks the renamer has already visited. /// SmallPtrSet Visited; @@ -136,22 +233,21 @@ namespace { /// non-determinstic behavior. DenseMap BBNumbers; + /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. + DenseMap DomLevels; + + /// BBNumPreds - Lazily compute the number of predecessors a block has. + DenseMap BBNumPreds; public: - PromoteMem2Reg(const std::vector &A, - SmallVector &Retry, DominatorTree &dt, - DominanceFrontier &df, AliasSetTracker *ast) - : Allocas(A), RetryList(Retry), DT(dt), DF(df), AST(ast) {} + PromoteMem2Reg(const std::vector &A, DominatorTree &dt, + AliasSetTracker *ast) + : Allocas(A), DT(dt), DIB(0), AST(ast) {} + ~PromoteMem2Reg() { + delete DIB; + } void run(); - /// properlyDominates - Return true if I1 properly dominates I2. - /// - bool properlyDominates(Instruction *I1, Instruction *I2) const { - if (InvokeInst *II = dyn_cast(I1)) - I1 = II->getNormalDest()->begin(); - return DT.properlyDominates(I1->getParent(), I2->getParent()); - } - /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. /// bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { @@ -164,29 +260,41 @@ namespace { Allocas.pop_back(); --AllocaIdx; } - - void MarkDominatingPHILive(BasicBlock *BB, unsigned AllocaNum, - SmallPtrSet &DeadPHINodes); - bool PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI); - void PromoteLocallyUsedAllocas(BasicBlock *BB, - const std::vector &AIs); + unsigned getNumPreds(const BasicBlock *BB) { + unsigned &NP = BBNumPreds[BB]; + if (NP == 0) + NP = std::distance(pred_begin(BB), pred_end(BB))+1; + return NP-1; + } + + void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, + AllocaInfo &Info); + void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet &DefBlocks, + SmallPtrSet &LiveInBlocks); + + void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI); + void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI); + void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, std::vector &Worklist); - bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, - SmallPtrSet &InsertedPHINodes); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); }; struct AllocaInfo { - std::vector DefiningBlocks; - std::vector UsingBlocks; + SmallVector DefiningBlocks; + SmallVector UsingBlocks; StoreInst *OnlyStore; BasicBlock *OnlyBlock; bool OnlyUsedInOneBlock; Value *AllocaPointerVal; + DbgDeclareInst *DbgDeclare; void clear() { DefiningBlocks.clear(); @@ -195,19 +303,21 @@ namespace { OnlyBlock = 0; OnlyUsedInOneBlock = true; AllocaPointerVal = 0; + DbgDeclare = 0; } /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our /// ivars. void AnalyzeAlloca(AllocaInst *AI) { clear(); - + // As we scan the uses of the alloca instruction, keep track of stores, // and decide whether all of the loads and stores to the alloca are within // the same basic block. - for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); - U != E; ++U){ - Instruction *User = cast(*U); + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E;) { + Instruction *User = cast(*UI++); + if (StoreInst *SI = dyn_cast(User)) { // Remember the basic blocks which define new values for the alloca DefiningBlocks.push_back(SI->getParent()); @@ -215,7 +325,8 @@ namespace { OnlyStore = SI; } else { LoadInst *LI = cast(User); - // Otherwise it must be a load instruction, keep track of variable reads + // Otherwise it must be a load instruction, keep track of variable + // reads. UsingBlocks.push_back(LI->getParent()); AllocaPointerVal = LI; } @@ -227,24 +338,54 @@ namespace { OnlyUsedInOneBlock = false; } } + + DbgDeclare = FindAllocaDbgDeclare(AI); } }; + typedef std::pair DomTreeNodePair; + + struct DomTreeNodeCompare { + bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { + return LHS.second < RHS.second; + } + }; } // end of anonymous namespace -void PromoteMem2Reg::run() { - Function &F = *DF.getRoot()->getParent(); +static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. - // LocallyUsedAllocas - Keep track of all of the alloca instructions which are - // only used in a single basic block. These instructions can be efficiently - // promoted by performing a single linear scan over that one block. Since - // individual basic blocks are sometimes large, we group together all allocas - // that are live in a single basic block by the basic block they are live in. - std::map > LocallyUsedAllocas; + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE;) { + Instruction *I = cast(*UI); + ++UI; + if (isa(I) || isa(I)) + continue; + + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE;) { + Instruction *Inst = cast(*UI); + ++UI; + Inst->eraseFromParent(); + } + } + I->eraseFromParent(); + } +} + +void PromoteMem2Reg::run() { + Function &F = *DT.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); + AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; + LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; @@ -254,6 +395,8 @@ void PromoteMem2Reg::run() { assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); + removeLifetimeIntrinsicUsers(AI); + if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); @@ -269,71 +412,92 @@ void PromoteMem2Reg::run() { // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); - // If the alloca is only read and written in one basic block, just perform a - // linear sweep over the block to eliminate it. - if (Info.OnlyUsedInOneBlock) { - LocallyUsedAllocas[Info.OnlyBlock].push_back(AI); - - // Remove the alloca from the Allocas list, since it will be processed. - RemoveFromAllocasList(AllocaNum); - continue; - } - // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - // Be aware of loads before the store. - std::set ProcessedBlocks; - for (unsigned i = 0, e = Info.UsingBlocks.size(); i != e; ++i) - // If the store dominates the block and if we haven't processed it yet, - // do so now. - if (dominates(Info.OnlyStore->getParent(), Info.UsingBlocks[i])) - if (ProcessedBlocks.insert(Info.UsingBlocks[i]).second) { - BasicBlock *UseBlock = Info.UsingBlocks[i]; - - // If the use and store are in the same block, do a quick scan to - // verify that there are no uses before the store. - if (UseBlock == Info.OnlyStore->getParent()) { - BasicBlock::iterator I = UseBlock->begin(); - for (; &*I != Info.OnlyStore; ++I) { // scan block for store. - if (isa(I) && I->getOperand(0) == AI) - break; - } - if (&*I != Info.OnlyStore) break; // Do not handle this case. - } - - // Otherwise, if this is a different block or if all uses happen - // after the store, do a simple linear scan to replace loads with - // the stored value. - for (BasicBlock::iterator I = UseBlock->begin(),E = UseBlock->end(); - I != E; ) { - if (LoadInst *LI = dyn_cast(I++)) { - if (LI->getOperand(0) == AI) { - LI->replaceAllUsesWith(Info.OnlyStore->getOperand(0)); - if (AST && isa(LI->getType())) - AST->deleteValue(LI); - LI->eraseFromParent(); - } - } - } - - // Finally, remove this block from the UsingBlock set. - Info.UsingBlocks[i] = Info.UsingBlocks.back(); - --i; --e; - } + RewriteSingleStoreAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { - ++NumSingleStore; + // Record debuginfo for the store and remove the declaration's debuginfo. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + if (!DIB) + DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); + DDI->eraseFromParent(); + } + // Remove the (now dead) store and alloca. + Info.OnlyStore->eraseFromParent(); + LBI.deleteValue(Info.OnlyStore); + + if (AST) AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); + + ++NumSingleStore; continue; } } - - if (AST) - PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; + // If the alloca is only read and written in one basic block, just perform a + // linear sweep over the block to eliminate it. + if (Info.OnlyUsedInOneBlock) { + PromoteSingleBlockAlloca(AI, Info, LBI); + + // Finally, after the scan, check to see if the stores are all that is + // left. + if (Info.UsingBlocks.empty()) { + + // Remove the (now dead) stores and alloca. + while (!AI->use_empty()) { + StoreInst *SI = cast(AI->use_back()); + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } + SI->eraseFromParent(); + LBI.deleteValue(SI); + } + + if (AST) AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + + // The alloca's debuginfo can be removed as well. + if (DbgDeclareInst *DDI = Info.DbgDeclare) + DDI->eraseFromParent(); + + ++NumLocalPromoted; + continue; + } + } + + // If we haven't computed dominator tree levels, do so now. + if (DomLevels.empty()) { + SmallVector Worklist; + + DomTreeNode *Root = DT.getRootNode(); + DomLevels[Root] = 0; + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + unsigned ChildLevel = DomLevels[Node] + 1; + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); + CI != CE; ++CI) { + DomLevels[*CI] = ChildLevel; + Worklist.push_back(*CI); + } + } + } // If we haven't computed a numbering for the BB's in the function, do so // now. @@ -343,94 +507,30 @@ void PromoteMem2Reg::run() { BBNumbers[I] = ID++; } - // Compute the locations where PhiNodes need to be inserted. Look at the - // dominance frontier of EACH basic-block we have a write in. - // - unsigned CurrentVersion = 0; - SmallPtrSet InsertedPHINodes; - std::vector > DFBlocks; - while (!Info.DefiningBlocks.empty()) { - BasicBlock *BB = Info.DefiningBlocks.back(); - Info.DefiningBlocks.pop_back(); - - // Look up the DF for this write, add it to PhiNodes - DominanceFrontier::const_iterator it = DF.find(BB); - if (it != DF.end()) { - const DominanceFrontier::DomSetType &S = it->second; - - // In theory we don't need the indirection through the DFBlocks vector. - // In practice, the order of calling QueuePhiNode would depend on the - // (unspecified) ordering of basic blocks in the dominance frontier, - // which would give PHI nodes non-determinstic subscripts. Fix this by - // processing blocks in order of the occurance in the function. - for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), - PE = S.end(); P != PE; ++P) - DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); - - // Sort by which the block ordering in the function. - std::sort(DFBlocks.begin(), DFBlocks.end()); - - for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { - BasicBlock *BB = DFBlocks[i].second; - if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) - Info.DefiningBlocks.push_back(BB); - } - DFBlocks.clear(); - } - } - - // Now that we have inserted PHI nodes along the Iterated Dominance Frontier - // of the writes to the variable, scan through the reads of the variable, - // marking PHI nodes which are actually necessary as alive (by removing them - // from the InsertedPHINodes set). This is not perfect: there may PHI - // marked alive because of loads which are dominated by stores, but there - // will be no unmarked PHI nodes which are actually used. - // - for (unsigned i = 0, e = Info.UsingBlocks.size(); i != e; ++i) - MarkDominatingPHILive(Info.UsingBlocks[i], AllocaNum, InsertedPHINodes); - Info.UsingBlocks.clear(); - - // If there are any PHI nodes which are now known to be dead, remove them! - for (SmallPtrSet::iterator I = InsertedPHINodes.begin(), - E = InsertedPHINodes.end(); I != E; ++I) { - PHINode *PN = *I; - bool Erased=NewPhiNodes.erase(std::make_pair(PN->getParent(), AllocaNum)); - Erased=Erased; - assert(Erased && "PHI already removed?"); + // If we have an AST to keep updated, remember some pointer value that is + // stored into the alloca. + if (AST) + PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; - if (AST && isa(PN->getType())) - AST->deleteValue(PN); - PN->eraseFromParent(); - PhiToAllocaMap.erase(PN); - } - - // Keep the reverse mapping of the 'Allocas' array. + // Remember the dbg.declare intrinsic describing this alloca, if any. + if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; + + // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; - } - // Process all allocas which are only used in a single basic block. - for (std::map >::iterator I = - LocallyUsedAllocas.begin(), E = LocallyUsedAllocas.end(); I != E; ++I){ - const std::vector &LocAllocas = I->second; - assert(!LocAllocas.empty() && "empty alloca list??"); - - // It's common for there to only be one alloca in the list. Handle it - // efficiently. - if (LocAllocas.size() == 1) { - // If we can do the quick promotion pass, do so now. - if (PromoteLocallyUsedAlloca(I->first, LocAllocas[0])) - RetryList.push_back(LocAllocas[0]); // Failed, retry later. - } else { - // Locally promote anything possible. Note that if this is unable to - // promote a particular alloca, it puts the alloca onto the Allocas vector - // for global processing. - PromoteLocallyUsedAllocas(I->first, LocAllocas); - } + // At this point, we're committed to promoting the alloca using IDF's, and + // the standard SSA construction algorithm. Determine which blocks need PHI + // nodes and see if we can optimize out some work by avoiding insertion of + // dead phi nodes. + DetermineInsertionPoint(AI, AllocaNum, Info); } if (Allocas.empty()) return; // All of the allocas must have been trivial! + LBI.clear(); + + // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. @@ -444,13 +544,13 @@ void PromoteMem2Reg::run() { // std::vector RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); - while (!RenamePassWorkList.empty()) { + do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); - } + } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); @@ -460,16 +560,19 @@ void PromoteMem2Reg::run() { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in - // sections of dead code that were not processed on the dominance frontier. - // Just delete the users now. - // + // unreachable basic blocks that were not processed by walking the dominator + // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } - + // Remove alloca's dbg.declare instrinsics from the function. + for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) + if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) + DDI->eraseFromParent(); + // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is @@ -481,19 +584,16 @@ void PromoteMem2Reg::run() { for (DenseMap, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; - + // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = PN->hasConstantValue(true)) { - if (!isa(V) || - properlyDominates(cast(V), PN)) { - if (AST && isa(PN->getType())) - AST->deleteValue(PN); - PN->replaceAllUsesWith(V); - PN->eraseFromParent(); - NewPhiNodes.erase(I++); - EliminatedAPHI = true; - continue; - } + if (Value *V = SimplifyInstruction(PN, 0, &DT)) { + if (AST && PN->getType()->isPointerTy()) + AST->deleteValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + NewPhiNodes.erase(I++); + EliminatedAPHI = true; + continue; } ++I; } @@ -514,14 +614,14 @@ void PromoteMem2Reg::run() { if (&BB->front() != SomePHI) continue; - // Count the number of preds for BB. - SmallVector Preds(pred_begin(BB), pred_end(BB)); - // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. - if (SomePHI->getNumIncomingValues() == Preds.size()) + if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; + + // Get the preds for BB. + SmallVector Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient @@ -559,41 +659,246 @@ void PromoteMem2Reg::run() { NewPhiNodes.clear(); } -// MarkDominatingPHILive - Mem2Reg wants to construct "pruned" SSA form, not -// "minimal" SSA form. To do this, it inserts all of the PHI nodes on the IDF -// as usual (inserting the PHI nodes in the DeadPHINodes set), then processes -// each read of the variable. For each block that reads the variable, this -// function is called, which removes used PHI nodes from the DeadPHINodes set. -// After all of the reads have been processed, any PHI nodes left in the -// DeadPHINodes set are removed. -// -void PromoteMem2Reg::MarkDominatingPHILive(BasicBlock *BB, unsigned AllocaNum, - SmallPtrSet &DeadPHINodes) { - // Scan the immediate dominators of this block looking for a block which has a - // PHI node for Alloca num. If we find it, mark the PHI node as being alive! - DomTreeNode *IDomNode = DT.getNode(BB); - for (DomTreeNode *IDom = IDomNode; IDom; IDom = IDom->getIDom()) { - BasicBlock *DomBB = IDom->getBlock(); - DenseMap, PHINode*>::iterator - I = NewPhiNodes.find(std::make_pair(DomBB, AllocaNum)); - if (I != NewPhiNodes.end()) { - // Ok, we found an inserted PHI node which dominates this value. - PHINode *DominatingPHI = I->second; - - // Find out if we previously thought it was dead. If so, mark it as being - // live by removing it from the DeadPHINodes set. - if (DeadPHINodes.erase(DominatingPHI)) { - // Now that we have marked the PHI node alive, also mark any PHI nodes - // which it might use as being alive as well. - for (pred_iterator PI = pred_begin(DomBB), PE = pred_end(DomBB); - PI != PE; ++PI) - MarkDominatingPHILive(*PI, AllocaNum, DeadPHINodes); + +/// ComputeLiveInBlocks - Determine which blocks the value is live in. These +/// are blocks which lead to uses. Knowing this allows us to avoid inserting +/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes +/// would be dead). +void PromoteMem2Reg:: +ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet &DefBlocks, + SmallPtrSet &LiveInBlocks) { + + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); + + // If any of the using blocks is also a definition block, check to see if the + // definition occurs before or after the use. If it happens before the use, + // the value isn't really live-in. + for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { + BasicBlock *BB = LiveInBlockWorklist[i]; + if (!DefBlocks.count(BB)) continue; + + // Okay, this is a block that both uses and defines the value. If the first + // reference to the alloca is a def (store), then we know it isn't live-in. + for (BasicBlock::iterator I = BB->begin(); ; ++I) { + if (StoreInst *SI = dyn_cast(I)) { + if (SI->getOperand(1) != AI) continue; + + // We found a store to the alloca before a load. The alloca is not + // actually live-in here. + LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); + LiveInBlockWorklist.pop_back(); + --i, --e; + break; + } + + if (LoadInst *LI = dyn_cast(I)) { + if (LI->getOperand(0) != AI) continue; + + // Okay, we found a load before a store to the alloca. It is actually + // live into this block. + break; + } + } + } + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB)) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +/// DetermineInsertionPoint - At this point, we're committed to promoting the +/// alloca using IDF's, and the standard SSA construction algorithm. Determine +/// which blocks need phi nodes and see if we can optimize out some work by +/// avoiding insertion of dead phi nodes. +void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, + AllocaInfo &Info) { + // Unique the set of defining blocks for efficient lookup. + SmallPtrSet DefBlocks; + DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); + + // Determine which blocks the value is live in. These are blocks which lead + // to uses. + SmallPtrSet LiveInBlocks; + ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); + + // Use a priority queue keyed on dominator tree level so that inserted nodes + // are handled from the bottom of the dominator tree upwards. + typedef std::priority_queue, + DomTreeNodeCompare> IDFPriorityQueue; + IDFPriorityQueue PQ; + + for (SmallPtrSet::const_iterator I = DefBlocks.begin(), + E = DefBlocks.end(); I != E; ++I) { + if (DomTreeNode *Node = DT.getNode(*I)) + PQ.push(std::make_pair(Node, DomLevels[Node])); + } + + SmallVector, 32> DFBlocks; + SmallPtrSet Visited; + SmallVector Worklist; + while (!PQ.empty()) { + DomTreeNodePair RootPair = PQ.top(); + PQ.pop(); + DomTreeNode *Root = RootPair.first; + unsigned RootLevel = RootPair.second; + + // Walk all dominator tree children of Root, inspecting their CFG edges with + // targets elsewhere on the dominator tree. Only targets whose level is at + // most Root's level are added to the iterated dominance frontier of the + // definition set. + + Worklist.clear(); + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + BasicBlock *BB = Node->getBlock(); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; + ++SI) { + DomTreeNode *SuccNode = DT.getNode(*SI); + + // Quickly skip all CFG edges that are also dominator tree edges instead + // of catching them below. + if (SuccNode->getIDom() == Node) + continue; + + unsigned SuccLevel = DomLevels[SuccNode]; + if (SuccLevel > RootLevel) + continue; + + if (!Visited.insert(SuccNode)) + continue; + + BasicBlock *SuccBB = SuccNode->getBlock(); + if (!LiveInBlocks.count(SuccBB)) + continue; + + DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); + if (!DefBlocks.count(SuccBB)) + PQ.push(std::make_pair(SuccNode, SuccLevel)); + } + + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; + ++CI) { + if (!Visited.count(*CI)) + Worklist.push_back(*CI); } } } + + if (DFBlocks.size() > 1) + std::sort(DFBlocks.begin(), DFBlocks.end()); + + unsigned CurrentVersion = 0; + for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) + QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); +} + +/// RewriteSingleStoreAlloca - If there is only a single store to this value, +/// replace any loads of it that are directly dominated by the definition with +/// the value stored. +void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, + AllocaInfo &Info, + LargeBlockInfo &LBI) { + StoreInst *OnlyStore = Info.OnlyStore; + bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); + BasicBlock *StoreBB = OnlyStore->getParent(); + int StoreIndex = -1; + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { + Instruction *UserInst = cast(*UI++); + if (!isa(UserInst)) { + assert(UserInst == OnlyStore && "Should only have load/stores"); + continue; + } + LoadInst *LI = cast(UserInst); + + // Okay, if we have a load from the alloca, we want to replace it with the + // only value stored to the alloca. We can do this if the value is + // dominated by the store. If not, we use the rest of the mem2reg machinery + // to insert the phi nodes as needed. + if (!StoringGlobalVal) { // Non-instructions are always dominated. + if (LI->getParent() == StoreBB) { + // If we have a use that is in the same block as the store, compare the + // indices of the two instructions to see which one came first. If the + // load came before the store, we can't handle it. + if (StoreIndex == -1) + StoreIndex = LBI.getInstructionIndex(OnlyStore); + + if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(StoreBB); + continue; + } + + } else if (LI->getParent() != StoreBB && + !dominates(StoreBB, LI->getParent())) { + // If the load and store are in different blocks, use BB dominance to + // check their relationships. If the store doesn't dom the use, bail + // out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; + } + } + + // Otherwise, we *can* safely rewrite this load. + Value *ReplVal = OnlyStore->getOperand(0); + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = UndefValue::get(LI->getType()); + LI->replaceAllUsesWith(ReplVal); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } +} + +namespace { + +/// StoreIndexSearchPredicate - This is a helper predicate used to search by the +/// first element of a pair. +struct StoreIndexSearchPredicate { + bool operator()(const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + } +}; + } -/// PromoteLocallyUsedAlloca - Many allocas are only used within a single basic +/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic /// block. If this is the case, avoid traversing the CFG and inserting a lot of /// potentially useless PHI nodes by just performing a single linear pass over /// the basic block using the Alloca. @@ -607,112 +912,78 @@ void PromoteMem2Reg::MarkDominatingPHILive(BasicBlock *BB, unsigned AllocaNum, /// /// ... so long as A is not used before undef is set. /// -bool PromoteMem2Reg::PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI) { - assert(!AI->use_empty() && "There are no uses of the alloca!"); - - // Handle degenerate cases quickly. - if (AI->hasOneUse()) { - Instruction *U = cast(AI->use_back()); - if (LoadInst *LI = dyn_cast(U)) { - // Must be a load of uninitialized value. - LI->replaceAllUsesWith(UndefValue::get(AI->getAllocatedType())); - if (AST && isa(LI->getType())) - AST->deleteValue(LI); - } else { - // Otherwise it must be a store which is never read. - assert(isa(U)); - } - BB->getInstList().erase(U); - } else { - // Uses of the uninitialized memory location shall get undef. - Value *CurVal = 0; - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - if (LoadInst *LI = dyn_cast(Inst)) { - if (LI->getOperand(0) == AI) { - if (!CurVal) return true; // Could not locally promote! - - // Loads just returns the "current value"... - LI->replaceAllUsesWith(CurVal); - if (AST && isa(LI->getType())) - AST->deleteValue(LI); - BB->getInstList().erase(LI); - } - } else if (StoreInst *SI = dyn_cast(Inst)) { - if (SI->getOperand(1) == AI) { - // Store updates the "current value"... - CurVal = SI->getOperand(0); - BB->getInstList().erase(SI); - } +void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI) { + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to + // make it efficient to get the index of various operations in the block. + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + // Walk the use-def list of the alloca, getting the locations of all stores. + typedef SmallVector, 64> StoresByIndexTy; + StoresByIndexTy StoresByIndex; + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + if (StoreInst *SI = dyn_cast(*UI)) + StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); + + // If there are no stores to the alloca, just replace any loads with undef. + if (StoresByIndex.empty()) { + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) + if (LoadInst *LI = dyn_cast(*UI++)) { + LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LBI.deleteValue(LI); + LI->eraseFromParent(); } - } + return; } - - // After traversing the basic block, there should be no more uses of the - // alloca, remove it now. - assert(AI->use_empty() && "Uses of alloca from more than one BB??"); - if (AST) AST->deleteValue(AI); - AI->getParent()->getInstList().erase(AI); - ++NumLocalPromoted; - return false; -} - -/// PromoteLocallyUsedAllocas - This method is just like -/// PromoteLocallyUsedAlloca, except that it processes multiple alloca -/// instructions in parallel. This is important in cases where we have large -/// basic blocks, as we don't want to rescan the entire basic block for each -/// alloca which is locally used in it (which might be a lot). -void PromoteMem2Reg:: -PromoteLocallyUsedAllocas(BasicBlock *BB, const std::vector &AIs) { - std::map CurValues; - for (unsigned i = 0, e = AIs.size(); i != e; ++i) - CurValues[AIs[i]] = 0; // Insert with null value - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - if (LoadInst *LI = dyn_cast(Inst)) { - // Is this a load of an alloca we are tracking? - if (AllocaInst *AI = dyn_cast(LI->getOperand(0))) { - std::map::iterator AIt = CurValues.find(AI); - if (AIt != CurValues.end()) { - // If loading an uninitialized value, allow the inter-block case to - // handle it. Due to control flow, this might actually be ok. - if (AIt->second == 0) { // Use of locally uninitialized value?? - RetryList.push_back(AI); // Retry elsewhere. - CurValues.erase(AIt); // Stop tracking this here. - if (CurValues.empty()) return; - } else { - // Loads just returns the "current value"... - LI->replaceAllUsesWith(AIt->second); - if (AST && isa(LI->getType())) - AST->deleteValue(LI); - BB->getInstList().erase(LI); - } - } - } - } else if (StoreInst *SI = dyn_cast(Inst)) { - if (AllocaInst *AI = dyn_cast(SI->getOperand(1))) { - std::map::iterator AIt = CurValues.find(AI); - if (AIt != CurValues.end()) { - // Store updates the "current value"... - AIt->second = SI->getOperand(0); - BB->getInstList().erase(SI); - } - } + // Sort the stores by their index, making it efficient to do a lookup with a + // binary search. + std::sort(StoresByIndex.begin(), StoresByIndex.end()); + + // Walk all of the loads from this alloca, replacing them with the nearest + // store above them, if any. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + LoadInst *LI = dyn_cast(*UI++); + if (!LI) continue; + + unsigned LoadIdx = LBI.getInstructionIndex(LI); + + // Find the nearest store that has a lower than this load. + StoresByIndexTy::iterator I = + std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), + std::pair(LoadIdx, static_cast(0)), + StoreIndexSearchPredicate()); + + // If there is no store before this load, then we can't promote this load. + if (I == StoresByIndex.begin()) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; } + + // Otherwise, there was a store before this load, the load takes its value. + --I; + LI->replaceAllUsesWith(I->second->getOperand(0)); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); } } - - // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific // Alloca returns true if there wasn't already a phi-node for that variable // bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, - unsigned &Version, - SmallPtrSet &InsertedPHINodes) { + unsigned &Version) { // Look up the basic-block in question. PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; @@ -721,20 +992,18 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. - PN = new PHINode(Allocas[AllocaNo]->getAllocatedType(), - Allocas[AllocaNo]->getName() + "." + - utostr(Version++), BB->begin()); + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), + Allocas[AllocaNo]->getName() + "." + Twine(Version++), + BB->begin()); + ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; - - InsertedPHINodes.insert(PN); - if (AST && isa(PN->getType())) + if (AST && PN->getType()->isPointerTy()) AST->copyValue(PointerAllocaValues[AllocaNo], PN); return true; } - // RenamePass - Recursively traverse the CFG of the function, renaming loads and // stores to the allocas which we are promoting. IncomingVals indicates what // value each Alloca contains on exit from the predecessor block Pred. @@ -742,31 +1011,25 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncomingVals, std::vector &Worklist) { +NextIteration: // If we are inserting any phi nodes into this BB, they will already be in the // block. if (PHINode *APN = dyn_cast(BB->begin())) { - // Pred may have multiple edges to BB. If so, we want to add N incoming - // values to each PHI we are inserting on the first time we see the edge. - // Check to see if APN already has incoming values from Pred. This also - // prevents us from modifying PHI nodes that are not currently being - // inserted. - bool HasPredEntries = false; - for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) { - if (APN->getIncomingBlock(i) == Pred) { - HasPredEntries = true; - break; - } - } - // If we have PHI nodes to update, compute the number of edges from Pred to // BB. - if (!HasPredEntries) { - TerminatorInst *PredTerm = Pred->getTerminator(); + if (PhiToAllocaMap.count(APN)) { + // We want to be able to distinguish between PHI nodes being inserted by + // this invocation of mem2reg from those phi nodes that already existed in + // the IR before mem2reg was run. We determine that APN is being inserted + // because it is missing incoming edges. All other PHI nodes being + // inserted by this pass of mem2reg will have the same number of incoming + // operands so far. Remember this count. + unsigned NewPHINumOperands = APN->getNumOperands(); + unsigned NumEdges = 0; - for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { - if (PredTerm->getSuccessor(i) == BB) + for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) + if (*I == BB) ++NumEdges; - } assert(NumEdges && "Must be at least one edge from Pred to BB!"); // Add entries for all the phis. @@ -786,16 +1049,9 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, APN = dyn_cast(PNI); if (APN == 0) break; - // Verify it doesn't already have entries for Pred. If it does, it is - // not being inserted by this mem2reg invocation. - HasPredEntries = false; - for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) { - if (APN->getIncomingBlock(i) == Pred) { - HasPredEntries = true; - break; - } - } - } while (!HasPredEntries); + // Verify that it is missing entries. If not, it is not being inserted + // by this mem2reg invocation so we want to ignore it. + } while (APN->getNumOperands() == NewPHINumOperands); } } @@ -806,72 +1062,73 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, Instruction *I = II++; // get the instruction, increment iterator if (LoadInst *LI = dyn_cast(I)) { - if (AllocaInst *Src = dyn_cast(LI->getPointerOperand())) { - std::map::iterator AI = AllocaLookup.find(Src); - if (AI != AllocaLookup.end()) { - Value *V = IncomingVals[AI->second]; - - // walk the use list of this load and replace all uses with r - LI->replaceAllUsesWith(V); - if (AST && isa(LI->getType())) - AST->deleteValue(LI); - BB->getInstList().erase(LI); - } - } + AllocaInst *Src = dyn_cast(LI->getPointerOperand()); + if (!Src) continue; + + DenseMap::iterator AI = AllocaLookup.find(Src); + if (AI == AllocaLookup.end()) continue; + + Value *V = IncomingVals[AI->second]; + + // Anything using the load now uses the current value. + LI->replaceAllUsesWith(V); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + BB->getInstList().erase(LI); } else if (StoreInst *SI = dyn_cast(I)) { // Delete this instruction and mark the name as the current holder of the // value - if (AllocaInst *Dest = dyn_cast(SI->getPointerOperand())) { - std::map::iterator ai = AllocaLookup.find(Dest); - if (ai != AllocaLookup.end()) { - // what value were we writing? - IncomingVals[ai->second] = SI->getOperand(0); - BB->getInstList().erase(SI); - } + AllocaInst *Dest = dyn_cast(SI->getPointerOperand()); + if (!Dest) continue; + + DenseMap::iterator ai = AllocaLookup.find(Dest); + if (ai == AllocaLookup.end()) + continue; + + // what value were we writing? + IncomingVals[ai->second] = SI->getOperand(0); + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); } + BB->getInstList().erase(SI); } } - // Recurse to our successors. - TerminatorInst *TI = BB->getTerminator(); - for (unsigned i = 0; i != TI->getNumSuccessors(); i++) - Worklist.push_back(RenamePassData(TI->getSuccessor(i), BB, IncomingVals)); + // 'Recurse' to our successors. + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) return; + + // Keep track of the successors so we don't visit the same successor twice + SmallPtrSet VisitedSuccs; + + // Handle the first successor without using the worklist. + VisitedSuccs.insert(*I); + Pred = BB; + BB = *I; + ++I; + + for (; I != E; ++I) + if (VisitedSuccs.insert(*I)) + Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); + + goto NextIteration; } /// PromoteMemToReg - Promote the specified list of alloca instructions into -/// scalar registers, inserting PHI nodes as appropriate. This function makes -/// use of DominanceFrontier information. This function does not modify the CFG -/// of the function at all. All allocas must be from the same function. +/// scalar registers, inserting PHI nodes as appropriate. This function does +/// not modify the CFG of the function at all. All allocas must be from the +/// same function. /// /// If AST is specified, the specified tracker is updated to reflect changes /// made to the IR. /// void llvm::PromoteMemToReg(const std::vector &Allocas, - DominatorTree &DT, DominanceFrontier &DF, - AliasSetTracker *AST) { + DominatorTree &DT, AliasSetTracker *AST) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - SmallVector RetryList; - PromoteMem2Reg(Allocas, RetryList, DT, DF, AST).run(); - - // PromoteMem2Reg may not have been able to promote all of the allocas in one - // pass, run it again if needed. - std::vector NewAllocas; - while (!RetryList.empty()) { - // If we need to retry some allocas, this is due to there being no store - // before a read in a local block. To counteract this, insert a store of - // undef into the alloca right after the alloca itself. - for (unsigned i = 0, e = RetryList.size(); i != e; ++i) { - BasicBlock::iterator BBI = RetryList[i]; - - new StoreInst(UndefValue::get(RetryList[i]->getAllocatedType()), - RetryList[i], ++BBI); - } - - NewAllocas.assign(RetryList.begin(), RetryList.end()); - RetryList.clear(); - PromoteMem2Reg(NewAllocas, RetryList, DT, DF, AST).run(); - NewAllocas.clear(); - } + PromoteMem2Reg(Allocas, DT, AST).run(); }