X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FGVN.cpp;h=45fd665b1cb14e792d2cb042e532fbced764ab58;hb=3ecfc861b4365f341c5c969b40e1afccde676e6f;hp=c144e16f48eae0c12428dfc753729e61a1de372a;hpb=484d4a30c055eef3101d01a7a468db3413dd20d3;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c144e16f48e..45fd665b1cb 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -17,38 +17,30 @@ #define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" -#include "llvm/BasicBlock.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" -#include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" -#include "llvm/Operator.h" -#include "llvm/Value.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" using namespace llvm; STATISTIC(NumGVNInstr, "Number of instructions deleted"); @@ -60,7 +52,6 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt EnablePRE("enable-pre", cl::init(true), cl::Hidden); static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); -static cl::opt EnableFullLoadPRE("enable-full-load-pre", cl::init(false)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -71,76 +62,23 @@ static cl::opt EnableFullLoadPRE("enable-full-load-pre", cl::init(false)); /// two values. namespace { struct Expression { - enum ExpressionOpcode { - ADD = Instruction::Add, - FADD = Instruction::FAdd, - SUB = Instruction::Sub, - FSUB = Instruction::FSub, - MUL = Instruction::Mul, - FMUL = Instruction::FMul, - UDIV = Instruction::UDiv, - SDIV = Instruction::SDiv, - FDIV = Instruction::FDiv, - UREM = Instruction::URem, - SREM = Instruction::SRem, - FREM = Instruction::FRem, - SHL = Instruction::Shl, - LSHR = Instruction::LShr, - ASHR = Instruction::AShr, - AND = Instruction::And, - OR = Instruction::Or, - XOR = Instruction::Xor, - TRUNC = Instruction::Trunc, - ZEXT = Instruction::ZExt, - SEXT = Instruction::SExt, - FPTOUI = Instruction::FPToUI, - FPTOSI = Instruction::FPToSI, - UITOFP = Instruction::UIToFP, - SITOFP = Instruction::SIToFP, - FPTRUNC = Instruction::FPTrunc, - FPEXT = Instruction::FPExt, - PTRTOINT = Instruction::PtrToInt, - INTTOPTR = Instruction::IntToPtr, - BITCAST = Instruction::BitCast, - ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, - ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, - FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, - FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, - FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, - SHUFFLE, SELECT, GEP, CALL, CONSTANT, - INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; - - ExpressionOpcode opcode; + uint32_t opcode; const Type* type; SmallVector varargs; - Value *function; Expression() { } - Expression(ExpressionOpcode o) : opcode(o) { } + Expression(uint32_t o) : opcode(o) { } bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; - else if (opcode == EMPTY || opcode == TOMBSTONE) + else if (opcode == ~0U || opcode == ~1U) return true; else if (type != other.type) return false; - else if (function != other.function) + else if (varargs != other.varargs) return false; - else { - if (varargs.size() != other.varargs.size()) - return false; - - for (size_t i = 0; i < varargs.size(); ++i) - if (varargs[i] != other.varargs[i]) - return false; - - return true; - } - } - - bool operator!=(const Expression &other) const { - return !(*this == other); + return true; } }; @@ -154,20 +92,7 @@ namespace { uint32_t nextValueNumber; - Expression::ExpressionOpcode getOpcode(CmpInst* C); - Expression create_expression(BinaryOperator* BO); - Expression create_expression(CmpInst* C); - Expression create_expression(ShuffleVectorInst* V); - Expression create_expression(ExtractElementInst* C); - Expression create_expression(InsertElementInst* V); - Expression create_expression(SelectInst* V); - Expression create_expression(CastInst* C); - Expression create_expression(GetElementPtrInst* G); - Expression create_expression(CallInst* C); - Expression create_expression(Constant* C); - Expression create_expression(ExtractValueInst* C); - Expression create_expression(InsertValueInst* C); - + Expression create_expression(Instruction* I); uint32_t lookup_or_add_call(CallInst* C); public: ValueTable() : nextValueNumber(1) { } @@ -176,7 +101,6 @@ namespace { void add(Value *V, uint32_t num); void clear(); void erase(Value *v); - unsigned size(); void setAliasAnalysis(AliasAnalysis* A) { AA = A; } AliasAnalysis *getAliasAnalysis() const { return AA; } void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } @@ -189,11 +113,11 @@ namespace { namespace llvm { template <> struct DenseMapInfo { static inline Expression getEmptyKey() { - return Expression(Expression::EMPTY); + return ~0U; } static inline Expression getTombstoneKey() { - return Expression(Expression::TOMBSTONE); + return ~1U; } static unsigned getHashValue(const Expression e) { @@ -205,20 +129,13 @@ template <> struct DenseMapInfo { for (SmallVector::const_iterator I = e.varargs.begin(), E = e.varargs.end(); I != E; ++I) hash = *I + hash * 37; - - hash = ((unsigned)((uintptr_t)e.function >> 4) ^ - (unsigned)((uintptr_t)e.function >> 9)) + - hash * 37; - + return hash; } static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } }; - -template <> -struct isPodLike { static const bool value = true; }; } @@ -226,184 +143,27 @@ struct isPodLike { static const bool value = true; }; // ValueTable Internal Functions //===----------------------------------------------------------------------===// -Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { - if (isa(C)) { - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Comparison with unknown predicate?"); - case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; - case ICmpInst::ICMP_NE: return Expression::ICMPNE; - case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; - case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; - case ICmpInst::ICMP_ULT: return Expression::ICMPULT; - case ICmpInst::ICMP_ULE: return Expression::ICMPULE; - case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; - case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; - case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; - case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; - } - } else { - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Comparison with unknown predicate?"); - case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; - case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; - case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; - case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; - case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; - case FCmpInst::FCMP_ONE: return Expression::FCMPONE; - case FCmpInst::FCMP_ORD: return Expression::FCMPORD; - case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; - case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; - case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; - case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; - case FCmpInst::FCMP_ULT: return Expression::FCMPULT; - case FCmpInst::FCMP_ULE: return Expression::FCMPULE; - case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; - } - } -} - -Expression ValueTable::create_expression(CallInst* C) { - Expression e; - - e.type = C->getType(); - e.function = C->getCalledFunction(); - e.opcode = Expression::CALL; - - for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); - I != E; ++I) - e.varargs.push_back(lookup_or_add(*I)); - - return e; -} - -Expression ValueTable::create_expression(BinaryOperator* BO) { - Expression e; - e.varargs.push_back(lookup_or_add(BO->getOperand(0))); - e.varargs.push_back(lookup_or_add(BO->getOperand(1))); - e.function = 0; - e.type = BO->getType(); - e.opcode = static_cast(BO->getOpcode()); - - return e; -} - -Expression ValueTable::create_expression(CmpInst* C) { - Expression e; - - e.varargs.push_back(lookup_or_add(C->getOperand(0))); - e.varargs.push_back(lookup_or_add(C->getOperand(1))); - e.function = 0; - e.type = C->getType(); - e.opcode = getOpcode(C); - - return e; -} - -Expression ValueTable::create_expression(CastInst* C) { - Expression e; - - e.varargs.push_back(lookup_or_add(C->getOperand(0))); - e.function = 0; - e.type = C->getType(); - e.opcode = static_cast(C->getOpcode()); - - return e; -} - -Expression ValueTable::create_expression(ShuffleVectorInst* S) { - Expression e; - - e.varargs.push_back(lookup_or_add(S->getOperand(0))); - e.varargs.push_back(lookup_or_add(S->getOperand(1))); - e.varargs.push_back(lookup_or_add(S->getOperand(2))); - e.function = 0; - e.type = S->getType(); - e.opcode = Expression::SHUFFLE; - - return e; -} - -Expression ValueTable::create_expression(ExtractElementInst* E) { - Expression e; - - e.varargs.push_back(lookup_or_add(E->getOperand(0))); - e.varargs.push_back(lookup_or_add(E->getOperand(1))); - e.function = 0; - e.type = E->getType(); - e.opcode = Expression::EXTRACT; - - return e; -} - -Expression ValueTable::create_expression(InsertElementInst* I) { - Expression e; - - e.varargs.push_back(lookup_or_add(I->getOperand(0))); - e.varargs.push_back(lookup_or_add(I->getOperand(1))); - e.varargs.push_back(lookup_or_add(I->getOperand(2))); - e.function = 0; - e.type = I->getType(); - e.opcode = Expression::INSERT; - - return e; -} -Expression ValueTable::create_expression(SelectInst* I) { +Expression ValueTable::create_expression(Instruction *I) { Expression e; - - e.varargs.push_back(lookup_or_add(I->getCondition())); - e.varargs.push_back(lookup_or_add(I->getTrueValue())); - e.varargs.push_back(lookup_or_add(I->getFalseValue())); - e.function = 0; e.type = I->getType(); - e.opcode = Expression::SELECT; - - return e; -} - -Expression ValueTable::create_expression(GetElementPtrInst* G) { - Expression e; - - e.varargs.push_back(lookup_or_add(G->getPointerOperand())); - e.function = 0; - e.type = G->getType(); - e.opcode = Expression::GEP; - - for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); - I != E; ++I) - e.varargs.push_back(lookup_or_add(*I)); - - return e; -} - -Expression ValueTable::create_expression(ExtractValueInst* E) { - Expression e; - - e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); - for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); - e.function = 0; - e.type = E->getType(); - e.opcode = Expression::EXTRACTVALUE; - - return e; -} - -Expression ValueTable::create_expression(InsertValueInst* E) { - Expression e; - - e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); - e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); - for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); - e.function = 0; - e.type = E->getType(); - e.opcode = Expression::INSERTVALUE; - + e.opcode = I->getOpcode(); + for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + e.varargs.push_back(lookup_or_add(*OI)); + + if (CmpInst *C = dyn_cast(I)) + e.opcode = (C->getOpcode() << 8) | C->getPredicate(); + else if (ExtractValueInst *E = dyn_cast(I)) { + for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + } else if (InsertValueInst *E = dyn_cast(I)) { + for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + } + return e; } @@ -447,14 +207,14 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { if (local_dep.isDef()) { CallInst* local_cdep = cast(local_dep.getInst()); - if (local_cdep->getNumOperands() != C->getNumOperands()) { + if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -504,13 +264,13 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { return nextValueNumber++; } - if (cdep->getNumOperands() != C->getNumOperands()) { + if (cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -562,12 +322,8 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::And: case Instruction::Or : case Instruction::Xor: - exp = create_expression(cast(I)); - break; case Instruction::ICmp: case Instruction::FCmp: - exp = create_expression(cast(I)); - break; case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -580,28 +336,14 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: - exp = create_expression(cast(I)); - break; case Instruction::Select: - exp = create_expression(cast(I)); - break; case Instruction::ExtractElement: - exp = create_expression(cast(I)); - break; case Instruction::InsertElement: - exp = create_expression(cast(I)); - break; case Instruction::ShuffleVector: - exp = create_expression(cast(I)); - break; case Instruction::ExtractValue: - exp = create_expression(cast(I)); - break; case Instruction::InsertValue: - exp = create_expression(cast(I)); - break; case Instruction::GetElementPtr: - exp = create_expression(cast(I)); + exp = create_expression(I); break; default: valueNumbering[V] = nextValueNumber; @@ -647,32 +389,77 @@ void ValueTable::verifyRemoved(const Value *V) const { // GVN Pass //===----------------------------------------------------------------------===// -namespace { - struct ValueNumberScope { - ValueNumberScope* parent; - DenseMap table; - - ValueNumberScope(ValueNumberScope* p) : parent(p) { } - }; -} - namespace { class GVN : public FunctionPass { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - explicit GVN(bool nopre = false, bool noloads = false) - : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { } + explicit GVN(bool noloads = false) + : FunctionPass(ID), NoLoads(noloads), MD(0) { + initializeGVNPass(*PassRegistry::getPassRegistry()); + } private: - bool NoPRE; bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; + const TargetData* TD; ValueTable VN; - DenseMap localAvail; + + /// LeaderTable - A mapping from value numbers to lists of Value*'s that + /// have that value number. Use findLeader to query it. + struct LeaderTableEntry { + Value *Val; + BasicBlock *BB; + LeaderTableEntry *Next; + }; + DenseMap LeaderTable; + BumpPtrAllocator TableAllocator; + + /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for + /// its value number. + void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + LeaderTableEntry& Curr = LeaderTable[N]; + if (!Curr.Val) { + Curr.Val = V; + Curr.BB = BB; + return; + } + + LeaderTableEntry* Node = TableAllocator.Allocate(); + Node->Val = V; + Node->BB = BB; + Node->Next = Curr.Next; + Curr.Next = Node; + } + + /// removeFromLeaderTable - Scan the list of values corresponding to a given + /// value number, and remove the given value if encountered. + void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + LeaderTableEntry* Prev = 0; + LeaderTableEntry* Curr = &LeaderTable[N]; + + while (Curr->Val != V || Curr->BB != BB) { + Prev = Curr; + Curr = Curr->Next; + } + + if (Prev) { + Prev->Next = Curr->Next; + } else { + if (!Curr->Next) { + Curr->Val = 0; + Curr->BB = 0; + } else { + LeaderTableEntry* Next = Curr->Next; + Curr->Val = Next->Val; + Curr->BB = Next->BB; + Curr->Next = Next->Next; + } + } + } // List of critical edges to be split between iterations. SmallVector, 4> toSplit; @@ -699,9 +486,8 @@ namespace { bool processBlock(BasicBlock *BB); void dump(DenseMap& d); bool iterateOnFunction(Function &F); - Value *CollapsePhi(PHINode* p); bool performPRE(Function& F); - Value *lookupNumber(BasicBlock *BB, uint32_t num); + Value *findLeader(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); @@ -711,12 +497,15 @@ namespace { } // createGVNPass - The public interface to this file... -FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) { - return new GVN(NoPRE, NoLoads); +FunctionPass *llvm::createGVNPass(bool NoLoads) { + return new GVN(NoLoads); } -static RegisterPass X("gvn", - "Global Value Numbering"); +INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) void GVN::dump(DenseMap& d) { errs() << "{\n"; @@ -728,33 +517,6 @@ void GVN::dump(DenseMap& d) { errs() << "}\n"; } -static bool isSafeReplacement(PHINode* p, Instruction *inst) { - if (!isa(inst)) - return true; - - for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); - UI != E; ++UI) - if (PHINode* use_phi = dyn_cast(UI)) - if (use_phi->getParent() == inst->getParent()) - return false; - - return true; -} - -Value *GVN::CollapsePhi(PHINode *PN) { - Value *ConstVal = PN->hasConstantValue(DT); - if (!ConstVal) return 0; - - Instruction *Inst = dyn_cast(ConstVal); - if (!Inst) - return ConstVal; - - if (DT->dominates(Inst, PN)) - if (isSafeReplacement(PN, Inst)) - return Inst; - return 0; -} - /// IsValueFullyAvailableInBlock - Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep /// track of which blocks we know are fully alive in FullyAvailableBlocks. This @@ -869,7 +631,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, const Type *StoredValTy = StoredVal->getType(); - uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. @@ -938,47 +700,6 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); } -/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can -/// be expressed as a base pointer plus a constant offset. Return the base and -/// offset to the caller. -static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const TargetData &TD) { - Operator *PtrOp = dyn_cast(Ptr); - if (PtrOp == 0) return Ptr; - - // Just look through bitcasts. - if (PtrOp->getOpcode() == Instruction::BitCast) - return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); - - // If this is a GEP with constant indices, we can look through it. - GEPOperator *GEP = dyn_cast(PtrOp); - if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = cast(*I); - if (OpC->isZero()) continue; - - // Handle a struct and array indices which add their offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += OpC->getSExtValue()*Size; - } - } - - // Re-sign extend from the pointer size if needed to get overflow edge cases - // right. - unsigned PtrSize = TD.getPointerSizeInBits(); - if (PtrSize < 64) - Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); - - return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); -} - - /// AnalyzeLoadFromClobberingWrite - This function is called when we have a /// memdep query of a load that ends up being a clobbering memory write (store, /// memset, memcpy, memmove). This means that the write *may* provide bits used @@ -997,32 +718,29 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); - Value *LoadBase = - GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD); + Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); if (StoreBase != LoadBase) return -1; // If the load and store are to the exact same address, they should have been // a must alias. AA must have gotten confused. - // FIXME: Study to see if/when this happens. - if (LoadOffset == StoreOffset) { + // FIXME: Study to see if/when this happens. One case is forwarding a memset + // to a load from the base of the memset. #if 0 + if (LoadOffset == StoreOffset) { dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" << "Base = " << *StoreBase << "\n" << "Store Ptr = " << *WritePtr << "\n" << "Store Offs = " << StoreOffset << "\n" << "Load Ptr = " << *LoadPtr << "\n"; abort(); -#endif - return -1; } +#endif // If the load and store don't overlap at all, the store doesn't provide // anything to the load. In this case, they really don't alias at all, AA // must have gotten confused. - // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then - // remove this check, as it is duplicated with what we have below. uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); if ((WriteSizeInBits & 7) | (LoadSize & 7)) @@ -1032,11 +750,11 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, bool isAAFailure = false; - if (StoreOffset < LoadOffset) { + if (StoreOffset < LoadOffset) isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; - } else { + else isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; - } + if (isAAFailure) { #if 0 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" @@ -1068,12 +786,12 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const TargetData &TD) { // Cannot handle reading from store of first-class aggregate yet. - if (DepSI->getOperand(0)->getType()->isStructTy() || - DepSI->getOperand(0)->getType()->isArrayTy()) + if (DepSI->getValueOperand()->getType()->isStructTy() || + DepSI->getValueOperand()->getType()->isArrayTy()) return -1; Value *StorePtr = DepSI->getPointerOperand(); - uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, TD); } @@ -1100,7 +818,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, Constant *Src = dyn_cast(MTI->getSource()); if (Src == 0) return -1; - GlobalVariable *GV = dyn_cast(Src->getUnderlyingObject()); + GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Src, &TD)); if (GV == 0 || !GV->isConstant()) return -1; // See if the access is within the bounds of the transfer. @@ -1133,8 +851,8 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = SrcVal->getType()->getContext(); - uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; - uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; + uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; IRBuilder<> Builder(InsertPt->getParent(), InsertPt); @@ -1218,7 +936,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, return ConstantFoldLoadFromConstPtr(Src, &TD); } - +namespace { struct AvailableValueInBlock { /// BB - The basic block in question. @@ -1292,6 +1010,8 @@ struct AvailableValueInBlock { } }; +} + /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. @@ -1309,7 +1029,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, // Otherwise, we have to construct SSA form. SmallVector NewPHIs; SSAUpdater SSAUpdate(&NewPHIs); - SSAUpdate.Initialize(LI); + SSAUpdate.Initialize(LI->getType(), LI->getName()); const Type *LoadTy = LI->getType(); @@ -1330,12 +1050,21 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (V->getType()->isPointerTy()) for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); + + // Now that we've copied information to the new PHIs, scan through + // them again and inform alias analysis that we've added potentially + // escaping uses to any values that are operands to these PHIs. + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { + PHINode *P = NewPHIs[i]; + for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) + AA->addEscapingUse(P->getOperandUse(2*ii)); + } return V; } -static bool isLifetimeStart(Instruction *Inst) { - if (IntrinsicInst* II = dyn_cast(Inst)) +static bool isLifetimeStart(const Instruction *Inst) { + if (const IntrinsicInst* II = dyn_cast(Inst)) return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; } @@ -1346,8 +1075,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl &toErase) { // Find the non-local dependencies of the load. SmallVector Deps; - MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), - Deps); + AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); + MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " // << Deps.size() << *LI << '\n'); @@ -1375,8 +1104,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI, SmallVector ValuesPerBlock; SmallVector UnavailableBlocks; - const TargetData *TD = 0; - for (unsigned i = 0, e = Deps.size(); i != e; ++i) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); @@ -1391,14 +1118,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // read by the load, we can extract the bits we need for the load from the // stored value. if (StoreInst *DepSI = dyn_cast(DepInfo.getInst())) { - if (TD == 0) - TD = getAnalysisIfAvailable(); if (TD && Address) { int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, *TD); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - DepSI->getOperand(0), + DepSI->getValueOperand(), Offset)); continue; } @@ -1408,8 +1133,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. if (MemIntrinsic *DepMI = dyn_cast(DepInfo.getInst())) { - if (TD == 0) - TD = getAnalysisIfAvailable(); if (TD && Address) { int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, DepMI, *TD); @@ -1439,13 +1162,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (StoreInst *S = dyn_cast(DepInst)) { // Reject loads and stores that are to the same address but are of // different types if we have to. - if (S->getOperand(0)->getType() != LI->getType()) { - if (TD == 0) - TD = getAnalysisIfAvailable(); - + if (S->getValueOperand()->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(), *TD)) { UnavailableBlocks.push_back(DepBB); continue; @@ -1453,16 +1173,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - S->getOperand(0))); + S->getValueOperand())); continue; } if (LoadInst *LD = dyn_cast(DepInst)) { // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { - if (TD == 0) - TD = getAnalysisIfAvailable(); - // If the stored value is larger or equal to the loaded value, we can // reuse it. if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ @@ -1497,8 +1214,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); + VN.erase(LI); toErase.push_back(LI); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1531,24 +1249,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI, return false; if (Blockers.count(TmpBB)) return false; + + // If any of these blocks has more than one successor (i.e. if the edge we + // just traversed was critical), then there are other paths through this + // block along which the load may not be anticipated. Hoisting the load + // above this block would be adding the load to execution paths along + // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) - allSingleSucc = false; + return false; } assert(TmpBB); LoadBB = TmpBB; - // If we have a repl set with LI itself in it, this means we have a loop where - // at least one of the values is LI. Since this means that we won't be able - // to eliminate LI even if we insert uses in the other predecessors, we will - // end up increasing code size. Reject this by scanning for LI. - if (!EnableFullLoadPRE) { - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].isSimpleValue() && - ValuesPerBlock[i].getSimpleValue() == LI) - return false; - } - // FIXME: It is extremely unclear what this loop is doing, other than // artificially restricting loadpre. if (isSinglePred) { @@ -1580,6 +1293,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) FullyAvailableBlocks[UnavailableBlocks[i]] = false; + SmallVector, 4> NeedToSplit; for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { BasicBlock *Pred = *PI; @@ -1594,24 +1308,26 @@ bool GVN::processNonLocalLoad(LoadInst *LI, << Pred->getName() << "': " << *LI << '\n'); return false; } - unsigned SuccNum = SuccessorNumber(Pred, LoadBB); - toSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); - return false; + unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); + NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); } } + if (!NeedToSplit.empty()) { + toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); + return false; + } // Decide whether PRE is profitable for this load. unsigned NumUnavailablePreds = PredLoads.size(); assert(NumUnavailablePreds != 0 && "Fully available value should be eliminated above!"); - if (!EnableFullLoadPRE) { - // If this load is unavailable in multiple predecessors, reject it. - // FIXME: If we could restructure the CFG, we could make a common pred with - // all the preds that don't have an available LI and insert a new load into - // that one block. - if (NumUnavailablePreds != 1) + + // If this load is unavailable in multiple predecessors, reject it. + // FIXME: If we could restructure the CFG, we could make a common pred with + // all the preds that don't have an available LI and insert a new load into + // that one block. + if (NumUnavailablePreds != 1) return false; - } // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; @@ -1626,26 +1342,21 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If all preds have a single successor, then we know it is safe to insert // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. - PHITransAddr Address(LI->getOperand(0), TD); + PHITransAddr Address(LI->getPointerOperand(), TD); Value *LoadPtr = 0; if (allSingleSucc) { LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, NewInsts); } else { - Address.PHITranslateValue(LoadBB, UnavailablePred); + Address.PHITranslateValue(LoadBB, UnavailablePred, DT); LoadPtr = Address.getAddr(); - - // Make sure the value is live in the predecessor. - if (Instruction *Inst = dyn_cast_or_null(LoadPtr)) - if (!DT->dominates(Inst->getParent(), UnavailablePred)) - LoadPtr = 0; } // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. if (LoadPtr == 0) { DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " - << *LI->getOperand(0) << "\n"); + << *LI->getPointerOperand() << "\n"); CanDoPRE = false; break; } @@ -1654,8 +1365,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // @1 = getelementptr (i8* p, ... // test p and branch if == 0 // load @1 - // It is valid to have the getelementptr before the test, even if p can be 0, - // as getelementptr only does address arithmetic. + // It is valid to have the getelementptr before the test, even if p can + // be 0, as getelementptr only does address arithmetic. // If we are not pushing the value through any multiple-successor blocks // we do not have this case. Otherwise, check that the load is safe to // put anywhere; this can be improved, but should be conservatively safe. @@ -1672,8 +1383,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } if (!CanDoPRE) { - while (!NewInsts.empty()) - NewInsts.pop_back_val()->eraseFromParent(); + while (!NewInsts.empty()) { + Instruction *I = NewInsts.pop_back_val(); + if (MD) MD->removeInstruction(I); + I->eraseFromParent(); + } return false; } @@ -1699,13 +1413,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI, BasicBlock *UnavailablePred = I->first; Value *LoadPtr = I->second; - Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, - LI->getAlignment(), - UnavailablePred->getTerminator()); + Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, + LI->getAlignment(), + UnavailablePred->getTerminator()); + + // Transfer the old load's TBAA tag to the new load. + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) + NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); // Add the newly created load. ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, NewLoad)); + MD->invalidateCachedPointerInfo(LoadPtr); + DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); } // Perform PHI construction. @@ -1716,8 +1436,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); + VN.erase(LI); toErase.push_back(LI); - NumPRELoad++; + ++NumPRELoad; return true; } @@ -1747,19 +1468,19 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // access code. Value *AvailVal = 0; if (StoreInst *DepSI = dyn_cast(Dep.getInst())) - if (const TargetData *TD = getAnalysisIfAvailable()) { + if (TD) { int Offset = AnalyzeLoadFromClobberingStore(L->getType(), L->getPointerOperand(), DepSI, *TD); if (Offset != -1) - AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, + AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, L->getType(), L, *TD); } // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast(Dep.getInst())) { - if (const TargetData *TD = getAnalysisIfAvailable()) { + if (TD) { int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), L->getPointerOperand(), DepMI, *TD); @@ -1776,8 +1497,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(AvailVal); if (AvailVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(AvailVal); + VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1797,14 +1519,13 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast(DepInst)) { - Value *StoredVal = DepSI->getOperand(0); + Value *StoredVal = DepSI->getValueOperand(); // The store and load are to a must-aliased pointer, but they may not // actually have the same type. See if we know how to reuse the stored // value (depending on its type). - const TargetData *TD = 0; if (StoredVal->getType() != L->getType()) { - if ((TD = getAnalysisIfAvailable())) { + if (TD) { StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, *TD); if (StoredVal == 0) @@ -1821,8 +1542,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(StoredVal); if (StoredVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(StoredVal); + VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1832,9 +1554,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // The loads are of a must-aliased pointer, but they may not actually have // the same type. See if we know how to reuse the previously loaded value // (depending on its type). - const TargetData *TD = 0; if (DepLI->getType() != L->getType()) { - if ((TD = getAnalysisIfAvailable())) { + if (TD) { AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); if (AvailableVal == 0) return false; @@ -1850,8 +1571,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(AvailableVal); if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); + VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1860,8 +1582,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // intervening stores, for example. if (isa(DepInst) || isMalloc(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); + VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1870,8 +1593,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { if (IntrinsicInst* II = dyn_cast(DepInst)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) { L->replaceAllUsesWith(UndefValue::get(L->getType())); + VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } } @@ -1879,20 +1603,32 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { return false; } -Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { - DenseMap::iterator I = localAvail.find(BB); - if (I == localAvail.end()) - return 0; - - ValueNumberScope *Locals = I->second; - while (Locals) { - DenseMap::iterator I = Locals->table.find(num); - if (I != Locals->table.end()) - return I->second; - Locals = Locals->parent; +// findLeader - In order to find a leader for a given value number at a +// specific basic block, we first obtain the list of all Values for that number, +// and then scan the list to find one whose block dominates the block in +// question. This is fast because dominator tree queries consist of only +// a few comparisons of DFS numbers. +Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { + LeaderTableEntry Vals = LeaderTable[num]; + if (!Vals.Val) return 0; + + Value *Val = 0; + if (DT->dominates(Vals.BB, BB)) { + Val = Vals.Val; + if (isa(Val)) return Val; + } + + LeaderTableEntry* Next = Vals.Next; + while (Next) { + if (DT->dominates(Next->BB, BB)) { + if (isa(Next->Val)) return Next->Val; + if (!Val) Val = Next->Val; + } + + Next = Next->Next; } - return 0; + return Val; } @@ -1904,85 +1640,92 @@ bool GVN::processInstruction(Instruction *I, if (isa(I)) return false; + // If the instruction can be easily simplified then do so now in preference + // to value numbering it. Value numbering often exposes redundancies, for + // example if it determines that %y is equal to %x then the instruction + // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. + if (Value *V = SimplifyInstruction(I, TD, DT)) { + I->replaceAllUsesWith(V); + if (MD && V->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + VN.erase(I); + toErase.push_back(I); + return true; + } + if (LoadInst *LI = dyn_cast(I)) { bool Changed = processLoad(LI, toErase); if (!Changed) { unsigned Num = VN.lookup_or_add(LI); - localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); + addToLeaderTable(Num, LI, LI->getParent()); } return Changed; } - uint32_t NextNum = VN.getNextUnusedValueNumber(); - unsigned Num = VN.lookup_or_add(I); - + // For conditions branches, we can perform simple conditional propagation on + // the condition value itself. if (BranchInst *BI = dyn_cast(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - if (!BI->isConditional() || isa(BI->getCondition())) return false; - + Value *BranchCond = BI->getCondition(); uint32_t CondVN = VN.lookup_or_add(BranchCond); - + BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); - + if (TrueSucc->getSinglePredecessor()) - localAvail[TrueSucc]->table[CondVN] = - ConstantInt::getTrue(TrueSucc->getContext()); + addToLeaderTable(CondVN, + ConstantInt::getTrue(TrueSucc->getContext()), + TrueSucc); if (FalseSucc->getSinglePredecessor()) - localAvail[FalseSucc]->table[CondVN] = - ConstantInt::getFalse(TrueSucc->getContext()); - + addToLeaderTable(CondVN, + ConstantInt::getFalse(TrueSucc->getContext()), + FalseSucc); + return false; + } + + // Instructions with void type don't return a value, so there's + // no point in trying to find redudancies in them. + if (I->getType()->isVoidTy()) return false; + + uint32_t NextNum = VN.getNextUnusedValueNumber(); + unsigned Num = VN.lookup_or_add(I); // Allocations are always uniquely numbered, so we can save time and memory // by fast failing them. - } else if (isa(I) || isa(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + if (isa(I) || isa(I) || isa(I)) { + addToLeaderTable(Num, I, I->getParent()); return false; } - // Collapse PHI nodes - if (PHINode* p = dyn_cast(I)) { - Value *constVal = CollapsePhi(p); - - if (constVal) { - p->replaceAllUsesWith(constVal); - if (MD && constVal->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(constVal); - VN.erase(p); - - toErase.push_back(p); - } else { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - } - // If the number we were assigned was a brand new VN, then we don't // need to do a lookup to see if the number already exists // somewhere in the domtree: it can't! - } else if (Num == NextNum) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - + if (Num == NextNum) { + addToLeaderTable(Num, I, I->getParent()); + return false; + } + // Perform fast-path value-number based elimination of values inherited from // dominators. - } else if (Value *repl = lookupNumber(I->getParent(), Num)) { - // Remove it! - VN.erase(I); - I->replaceAllUsesWith(repl); - if (MD && repl->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); - return true; - - } else { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + Value *repl = findLeader(I->getParent(), Num); + if (repl == 0) { + // Failure, just remember this instance for future use. + addToLeaderTable(Num, I, I->getParent()); + return false; } - - return false; + + // Remove it! + VN.erase(I); + I->replaceAllUsesWith(repl); + if (MD && repl->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(repl); + toErase.push_back(I); + return true; } /// runOnFunction - This is the main transformation entry point for a function. @@ -1990,6 +1733,7 @@ bool GVN::runOnFunction(Function& F) { if (!NoLoads) MD = &getAnalysis(); DT = &getAnalysis(); + TD = getAnalysisIfAvailable(); VN.setAliasAnalysis(&getAnalysis()); VN.setMemDep(MD); VN.setDomTree(DT); @@ -2000,16 +1744,15 @@ bool GVN::runOnFunction(Function& F) { // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { - BasicBlock *BB = FI; - ++FI; + BasicBlock *BB = FI++; + bool removedBlock = MergeBlockIntoPredecessor(BB, this); - if (removedBlock) NumGVNBlocks++; + if (removedBlock) ++NumGVNBlocks; Changed |= removedBlock; } unsigned Iteration = 0; - while (ShouldContinue) { DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); ShouldContinue = iterateOnFunction(F); @@ -2099,6 +1842,11 @@ bool GVN::performPRE(Function &F) { CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa(CurInst)) continue; + + // We don't currently value number ANY inline asm calls. + if (CallInst *CallI = dyn_cast(CurInst)) + if (CallI->isInlineAsm()) + continue; uint32_t ValNo = VN.lookup(CurInst); @@ -2115,27 +1863,27 @@ bool GVN::performPRE(Function &F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; // We're not interested in PRE where the block is its // own predecessor, or in blocks with predecessors // that are not reachable. - if (*PI == CurrentBlock) { + if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!localAvail.count(*PI)) { + } else if (!DT->dominates(&F.getEntryBlock(), P)) { NumWithout = 2; break; } - DenseMap::iterator predV = - localAvail[*PI]->table.find(ValNo); - if (predV == localAvail[*PI]->table.end()) { - PREPred = *PI; - NumWithout++; - } else if (predV->second == CurInst) { + Value* predV = findLeader(P, ValNo); + if (predV == 0) { + PREPred = P; + ++NumWithout; + } else if (predV == CurInst) { NumWithout = 2; } else { - predMap[*PI] = predV->second; - NumWith++; + predMap[P] = predV; + ++NumWith; } } @@ -2151,7 +1899,7 @@ bool GVN::performPRE(Function &F) { // We can't do PRE safely on a critical edge, so instead we schedule // the edge to be split and perform the PRE the next time we iterate // on the function. - unsigned SuccNum = SuccessorNumber(PREPred, CurrentBlock); + unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); continue; @@ -2169,7 +1917,7 @@ bool GVN::performPRE(Function &F) { if (isa(Op) || isa(Op) || isa(Op)) continue; - if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { + if (Value *V = findLeader(PREPred, VN.lookup(Op))) { PREInstr->setOperand(i, V); } else { success = false; @@ -2190,26 +1938,37 @@ bool GVN::performPRE(Function &F) { PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); - NumGVNPRE++; + ++NumGVNPRE; // Update the availability map to include the new instruction. - localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); + addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(CurInst->getType(), + pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); + PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), CurInst->getName() + ".pre-phi", CurrentBlock->begin()); - for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) - Phi->addIncoming(predMap[*PI], *PI); + for (pred_iterator PI = PB; PI != PE; ++PI) { + BasicBlock *P = *PI; + Phi->addIncoming(predMap[P], P); + } VN.add(Phi, ValNo); - localAvail[CurrentBlock]->table[ValNo] = Phi; + addToLeaderTable(ValNo, Phi, CurrentBlock); CurInst->replaceAllUsesWith(Phi); - if (MD && Phi->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(Phi); + if (Phi->getType()->isPointerTy()) { + // Because we have added a PHI-use of the pointer value, it has now + // "escaped" from alias analysis' perspective. We need to inform + // AA of this. + for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) + VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii)); + + if (MD) + MD->invalidateCachedPointerInfo(Phi); + } VN.erase(CurInst); + removeFromLeaderTable(ValNo, CurInst, CurrentBlock); DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); @@ -2234,23 +1993,14 @@ bool GVN::splitCriticalEdges() { std::pair Edge = toSplit.pop_back_val(); SplitCriticalEdge(Edge.first, Edge.second, this); } while (!toSplit.empty()); - MD->invalidateCachedPredecessors(); + if (MD) MD->invalidateCachedPredecessors(); return true; } /// iterateOnFunction - Executes one iteration of GVN bool GVN::iterateOnFunction(Function &F) { cleanupGlobalSets(); - - for (df_iterator DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) { - if (DI->getIDom()) - localAvail[DI->getBlock()] = - new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); - else - localAvail[DI->getBlock()] = new ValueNumberScope(0); - } - + // Top-down walk of the dominator tree bool Changed = false; #if 0 @@ -2270,11 +2020,8 @@ bool GVN::iterateOnFunction(Function &F) { void GVN::cleanupGlobalSets() { VN.clear(); - - for (DenseMap::iterator - I = localAvail.begin(), E = localAvail.end(); I != E; ++I) - delete I->second; - localAvail.clear(); + LeaderTable.clear(); + TableAllocator.Reset(); } /// verifyRemoved - Verify that the specified instruction does not occur in our @@ -2284,17 +2031,14 @@ void GVN::verifyRemoved(const Instruction *Inst) const { // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. - for (DenseMap::const_iterator - I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { - const ValueNumberScope *VNS = I->second; - - while (VNS) { - for (DenseMap::const_iterator - II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { - assert(II->second != Inst && "Inst still in value numbering scope!"); - } - - VNS = VNS->parent; + for (DenseMap::const_iterator + I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { + const LeaderTableEntry *Node = &I->second; + assert(Node->Val != Inst && "Inst still in value numbering scope!"); + + while (Node->Next) { + Node = Node->Next; + assert(Node->Val != Inst && "Inst still in value numbering scope!"); } } }