X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FGVN.cpp;h=481956f6b4ce039dbb5307cc558372ce9e4c5d06;hb=61d30a821f372ca4185fc518023f857fbcb9c0f5;hp=1f3ecfa2b195152bdabe57bbaed930ae905a626e;hpb=4b55c3b0f17cdf548e45899ba069e454c6342bf1;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 1f3ecfa2b19..481956f6b4c 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by the Owen Anderson and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -19,18 +19,22 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" +#include "llvm/ParameterAttributes.h" #include "llvm/Value.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetData.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -51,7 +55,7 @@ namespace { FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, - PTRTOINT, INTTOPTR, BITCAST, GEP, EMPTY, + PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, EMPTY, TOMBSTONE }; ExpressionOpcode opcode; @@ -60,6 +64,7 @@ namespace { uint32_t secondVN; uint32_t thirdVN; SmallVector varargs; + Value* function; Expression() { } Expression(ExpressionOpcode o) : opcode(o) { } @@ -71,6 +76,8 @@ namespace { return true; else if (type != other.type) return false; + else if (function != other.function) + return false; else if (firstVN != other.firstVN) return false; else if (secondVN != other.secondVN) @@ -96,6 +103,8 @@ namespace { return false; else if (type != other.type) return true; + else if (function != other.function) + return true; else if (firstVN != other.firstVN) return true; else if (secondVN != other.secondVN) @@ -119,6 +128,7 @@ namespace { private: DenseMap valueNumbering; DenseMap expressionNumbering; + AliasAnalysis* AA; uint32_t nextValueNumber; @@ -133,19 +143,22 @@ namespace { Expression create_expression(SelectInst* V); Expression create_expression(CastInst* C); Expression create_expression(GetElementPtrInst* G); + Expression create_expression(CallInst* C); public: - ValueTable() { nextValueNumber = 1; } + ValueTable() : nextValueNumber(1) { } uint32_t lookup_or_add(Value* V); uint32_t lookup(Value* V) const; void add(Value* V, uint32_t num); void clear(); void erase(Value* v); unsigned size(); + void setAliasAnalysis(AliasAnalysis* A) { AA = A; } + uint32_t hash_operand(Value* v); }; } namespace llvm { -template <> struct DenseMapKeyInfo { +template <> struct DenseMapInfo { static inline Expression getEmptyKey() { return Expression(Expression::EMPTY); } @@ -169,8 +182,15 @@ template <> struct DenseMapKeyInfo { E = e.varargs.end(); I != E; ++I) hash = *I + hash * 37; + hash = (unsigned)((uintptr_t)e.function >> 4) ^ + (unsigned)((uintptr_t)e.function >> 9) + + hash * 37; + return hash; } + static bool isEqual(const Expression &LHS, const Expression &RHS) { + return LHS == RHS; + } static bool isPod() { return true; } }; } @@ -322,12 +342,38 @@ Expression::ExpressionOpcode } } +uint32_t ValueTable::hash_operand(Value* v) { + if (CallInst* CI = dyn_cast(v)) + if (!AA->doesNotAccessMemory(CI)) + return nextValueNumber++; + + return lookup_or_add(v); +} + +Expression ValueTable::create_expression(CallInst* C) { + Expression e; + + e.type = C->getType(); + e.firstVN = 0; + e.secondVN = 0; + e.thirdVN = 0; + e.function = C->getCalledFunction(); + e.opcode = Expression::CALL; + + for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); + I != E; ++I) + e.varargs.push_back(hash_operand(*I)); + + return e; +} + Expression ValueTable::create_expression(BinaryOperator* BO) { Expression e; - e.firstVN = lookup_or_add(BO->getOperand(0)); - e.secondVN = lookup_or_add(BO->getOperand(1)); + e.firstVN = hash_operand(BO->getOperand(0)); + e.secondVN = hash_operand(BO->getOperand(1)); e.thirdVN = 0; + e.function = 0; e.type = BO->getType(); e.opcode = getOpcode(BO); @@ -337,9 +383,10 @@ Expression ValueTable::create_expression(BinaryOperator* BO) { Expression ValueTable::create_expression(CmpInst* C) { Expression e; - e.firstVN = lookup_or_add(C->getOperand(0)); - e.secondVN = lookup_or_add(C->getOperand(1)); + e.firstVN = hash_operand(C->getOperand(0)); + e.secondVN = hash_operand(C->getOperand(1)); e.thirdVN = 0; + e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); @@ -349,9 +396,10 @@ Expression ValueTable::create_expression(CmpInst* C) { Expression ValueTable::create_expression(CastInst* C) { Expression e; - e.firstVN = lookup_or_add(C->getOperand(0)); + e.firstVN = hash_operand(C->getOperand(0)); e.secondVN = 0; e.thirdVN = 0; + e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); @@ -361,9 +409,10 @@ Expression ValueTable::create_expression(CastInst* C) { Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression e; - e.firstVN = lookup_or_add(S->getOperand(0)); - e.secondVN = lookup_or_add(S->getOperand(1)); - e.thirdVN = lookup_or_add(S->getOperand(2)); + e.firstVN = hash_operand(S->getOperand(0)); + e.secondVN = hash_operand(S->getOperand(1)); + e.thirdVN = hash_operand(S->getOperand(2)); + e.function = 0; e.type = S->getType(); e.opcode = Expression::SHUFFLE; @@ -373,9 +422,10 @@ Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression ValueTable::create_expression(ExtractElementInst* E) { Expression e; - e.firstVN = lookup_or_add(E->getOperand(0)); - e.secondVN = lookup_or_add(E->getOperand(1)); + e.firstVN = hash_operand(E->getOperand(0)); + e.secondVN = hash_operand(E->getOperand(1)); e.thirdVN = 0; + e.function = 0; e.type = E->getType(); e.opcode = Expression::EXTRACT; @@ -385,9 +435,10 @@ Expression ValueTable::create_expression(ExtractElementInst* E) { Expression ValueTable::create_expression(InsertElementInst* I) { Expression e; - e.firstVN = lookup_or_add(I->getOperand(0)); - e.secondVN = lookup_or_add(I->getOperand(1)); - e.thirdVN = lookup_or_add(I->getOperand(2)); + e.firstVN = hash_operand(I->getOperand(0)); + e.secondVN = hash_operand(I->getOperand(1)); + e.thirdVN = hash_operand(I->getOperand(2)); + e.function = 0; e.type = I->getType(); e.opcode = Expression::INSERT; @@ -397,9 +448,10 @@ Expression ValueTable::create_expression(InsertElementInst* I) { Expression ValueTable::create_expression(SelectInst* I) { Expression e; - e.firstVN = lookup_or_add(I->getCondition()); - e.secondVN = lookup_or_add(I->getTrueValue()); - e.thirdVN = lookup_or_add(I->getFalseValue()); + e.firstVN = hash_operand(I->getCondition()); + e.secondVN = hash_operand(I->getTrueValue()); + e.thirdVN = hash_operand(I->getFalseValue()); + e.function = 0; e.type = I->getType(); e.opcode = Expression::SELECT; @@ -409,15 +461,16 @@ Expression ValueTable::create_expression(SelectInst* I) { Expression ValueTable::create_expression(GetElementPtrInst* G) { Expression e; - e.firstVN = lookup_or_add(G->getPointerOperand()); + e.firstVN = hash_operand(G->getPointerOperand()); e.secondVN = 0; e.thirdVN = 0; + e.function = 0; e.type = G->getType(); e.opcode = Expression::GEP; for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); I != E; ++I) - e.varargs.push_back(lookup_or_add(*I)); + e.varargs.push_back(hash_operand(*I)); return e; } @@ -433,8 +486,25 @@ uint32_t ValueTable::lookup_or_add(Value* V) { if (VI != valueNumbering.end()) return VI->second; - - if (BinaryOperator* BO = dyn_cast(V)) { + if (CallInst* C = dyn_cast(V)) { + if (AA->onlyReadsMemory(C)) { // includes doesNotAccessMemory + Expression e = create_expression(C); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } + } else if (BinaryOperator* BO = dyn_cast(V)) { Expression e = create_expression(BO); DenseMap::iterator EI = expressionNumbering.find(e); @@ -642,12 +712,20 @@ namespace { DenseMap availableOut; + typedef DenseMap > PhiMapType; + PhiMapType phiMap; + + // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } // Helper fuctions @@ -663,10 +741,17 @@ namespace { SmallVector& toErase); bool processNonLocalLoad(LoadInst* L, SmallVector& toErase); + bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep, + SmallVector& toErase); + bool performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C, + SmallVector& toErase); Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig, DenseMap &Phis, bool top_level = false); void dump(DenseMap& d); + bool iterateOnFunction(Function &F); + Value* CollapsePhi(PHINode* p); + bool isSafeReplacement(PHINode* p, Instruction* inst); }; char GVN::ID = 0; @@ -718,6 +803,35 @@ void GVN::dump(DenseMap& d) { printf("}\n"); } +Value* GVN::CollapsePhi(PHINode* p) { + DominatorTree &DT = getAnalysis(); + Value* constVal = p->hasConstantValue(); + + if (constVal) { + if (Instruction* inst = dyn_cast(constVal)) { + if (DT.dominates(inst, p)) + if (isSafeReplacement(p, inst)) + return inst; + } else { + return constVal; + } + } + + return 0; +} + +bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { + if (!isa(inst)) + return true; + + for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); + UI != E; ++UI) + if (PHINode* use_phi = dyn_cast(UI)) + if (use_phi->getParent() == inst->getParent()) + return false; + + return true; +} /// GetValueForBlock - Get the value to use within the specified basic block. /// available values are in Phis. @@ -726,99 +840,121 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig, bool top_level) { // If we have already computed this value, return the previously computed val. - Value *V = Phis[BB]; - if (V && ! top_level) return V; + DenseMap::iterator V = Phis.find(BB); + if (V != Phis.end() && !top_level) return V->second; BasicBlock* singlePred = BB->getSinglePredecessor(); if (singlePred) { - V = GetValueForBlock(singlePred, orig, Phis); - Phis[BB] = V; - return V; + Value *ret = GetValueForBlock(singlePred, orig, Phis); + Phis[BB] = ret; + return ret; } // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so // now, then get values to fill in the incoming values for the PHI. PHINode *PN = new PHINode(orig->getType(), orig->getName()+".rle", BB->begin()); PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB))); - Phis[BB] = PN; - bool all_same = true; - Value* first = 0; + if (Phis.count(BB) == 0) + Phis.insert(std::make_pair(BB, PN)); // Fill in the incoming values for the block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { Value* val = GetValueForBlock(*PI, orig, Phis); - if (first == 0) - first = val; - else if (all_same && first != val) - all_same = false; PN->addIncoming(val, *PI); } + AliasAnalysis& AA = getAnalysis(); + AA.copyValue(orig, PN); - if (all_same) { + // Attempt to collapse PHI nodes that are trivially redundant + Value* v = CollapsePhi(PN); + if (v) { MemoryDependenceAnalysis& MD = getAnalysis(); - + MD.removeInstruction(PN); - PN->replaceAllUsesWith(first); - - SmallVector toRemove; + PN->replaceAllUsesWith(v); + for (DenseMap::iterator I = Phis.begin(), E = Phis.end(); I != E; ++I) if (I->second == PN) - toRemove.push_back(I->first); - for (SmallVector::iterator I = toRemove.begin(), - E= toRemove.end(); I != E; ++I) - Phis[*I] = first; - + I->second = v; + PN->eraseFromParent(); - - Phis[BB] = first; - - return first; + + Phis[BB] = v; + + return v; } + // Cache our phi construction results + phiMap[orig->getPointerOperand()].insert(PN); return PN; } +/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are +/// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst* L, SmallVector& toErase) { MemoryDependenceAnalysis& MD = getAnalysis(); + // Find the non-local dependencies of the load DenseMap deps; MD.getNonLocalDependency(L, deps); DenseMap repl; + + // Filter out useless results (non-locals, etc) for (DenseMap::iterator I = deps.begin(), E = deps.end(); I != E; ++I) if (I->second == MemoryDependenceAnalysis::None) { return false; } else if (I->second == MemoryDependenceAnalysis::NonLocal) { continue; - }else if (StoreInst* S = dyn_cast(I->second)) { + } else if (StoreInst* S = dyn_cast(I->second)) { if (S->getPointerOperand() == L->getPointerOperand()) - repl.insert(std::make_pair(I->first, S->getOperand(0))); + repl[I->first] = S->getOperand(0); else return false; } else if (LoadInst* LD = dyn_cast(I->second)) { if (LD->getPointerOperand() == L->getPointerOperand()) - repl.insert(std::make_pair(I->first, LD)); + repl[I->first] = LD; else return false; } else { return false; } + // Use cached PHI construction information from previous runs + SmallPtrSet& p = phiMap[L->getPointerOperand()]; + for (SmallPtrSet::iterator I = p.begin(), E = p.end(); + I != E; ++I) { + if ((*I)->getParent() == L->getParent()) { + MD.removeInstruction(L); + L->replaceAllUsesWith(*I); + toErase.push_back(L); + NumGVNLoad++; + + return true; + } else { + repl.insert(std::make_pair((*I)->getParent(), *I)); + } + } + + // Perform PHI construction SmallPtrSet visited; Value* v = GetValueForBlock(L->getParent(), L, repl, true); MD.removeInstruction(L); L->replaceAllUsesWith(v); toErase.push_back(L); + NumGVNLoad++; return true; } +/// processLoad - Attempt to eliminate a load, first by eliminating it +/// locally, and then attempting non-local elimination if that fails. bool GVN::processLoad(LoadInst* L, DenseMap& lastLoad, SmallVector& toErase) { @@ -832,12 +968,23 @@ bool GVN::processLoad(LoadInst* L, // ... to a pointer that has been loaded from before... MemoryDependenceAnalysis& MD = getAnalysis(); + bool removedNonLocal = false; Instruction* dep = MD.getDependency(L); if (dep == MemoryDependenceAnalysis::NonLocal && - L->getParent() != &L->getParent()->getParent()->getEntryBlock()) - processNonLocalLoad(L, toErase); + L->getParent() != &L->getParent()->getParent()->getEntryBlock()) { + removedNonLocal = processNonLocalLoad(L, toErase); + + if (!removedNonLocal) + last = L; + + return removedNonLocal; + } + + bool deletedLoad = false; + // Walk up the dependency chain until we either find + // a dependency we can use, or we can't walk any further while (dep != MemoryDependenceAnalysis::None && dep != MemoryDependenceAnalysis::NonLocal && (isa(dep) || isa(dep))) { @@ -874,14 +1021,193 @@ bool GVN::processLoad(LoadInst* L, dep = MD.getDependency(L, dep); } } - + + if (dep != MemoryDependenceAnalysis::None && + dep != MemoryDependenceAnalysis::NonLocal && + isa(dep)) { + // Check that this load is actually from the + // allocation we found + Value* v = L->getOperand(0); + while (true) { + if (BitCastInst *BC = dyn_cast(v)) + v = BC->getOperand(0); + else if (GetElementPtrInst *GEP = dyn_cast(v)) + v = GEP->getOperand(0); + else + break; + } + if (v == dep) { + // If this load depends directly on an allocation, there isn't + // anything stored there; therefore, we can optimize this load + // to undef. + MD.removeInstruction(L); + + L->replaceAllUsesWith(UndefValue::get(L->getType())); + toErase.push_back(L); + deletedLoad = true; + NumGVNLoad++; + } + } + if (!deletedLoad) last = L; return deletedLoad; } -/// buildsets_availout - When calculating availability, handle an instruction +/// isReturnSlotOptznProfitable - Determine if performing a return slot +/// fusion with the slot dest is profitable +static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) { + // We currently consider it profitable if dest is otherwise dead. + SmallVector useList(dest->use_begin(), dest->use_end()); + while (!useList.empty()) { + User* UI = useList.back(); + + if (isa(UI) || isa(UI)) { + useList.pop_back(); + for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); + I != E; ++I) + useList.push_back(*I); + } else if (UI == cpy) + useList.pop_back(); + else + return false; + } + + return true; +} + +/// performReturnSlotOptzn - takes a memcpy and a call that it depends on, +/// and checks for the possibility of a return slot optimization by having +/// the call write its result directly into the callees return parameter +/// rather than using memcpy +bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C, + SmallVector& toErase) { + // Deliberately get the source and destination with bitcasts stripped away, + // because we'll need to do type comparisons based on the underlying type. + Value* cpyDest = cpy->getDest(); + Value* cpySrc = cpy->getSource(); + CallSite CS = CallSite::get(C); + + // Since this is a return slot optimization, we need to make sure that + // the value being copied is, in fact, in a return slot. We also need to + // check that the return slot parameter is marked noalias, so that we can + // be sure that changing it will not cause unexpected behavior changes due + // to it being accessed through a global or another parameter. + if (CS.arg_size() == 0 || + cpySrc != CS.getArgument(0) || + !CS.paramHasAttr(1, ParamAttr::NoAlias | ParamAttr::StructRet)) + return false; + + // Check that something sneaky is not happening involving casting + // return slot types around. + if (CS.getArgument(0)->getType() != cpyDest->getType()) + return false; + // sret --> pointer + const PointerType* PT = cast(cpyDest->getType()); + + // We can only perform the transformation if the size of the memcpy + // is constant and equal to the size of the structure. + ConstantInt* cpyLength = dyn_cast(cpy->getLength()); + if (!cpyLength) + return false; + + TargetData& TD = getAnalysis(); + if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue()) + return false; + + // We only perform the transformation if it will be profitable. + if (!isReturnSlotOptznProfitable(cpyDest, cpy)) + return false; + + // In addition to knowing that the call does not access the return slot + // in some unexpected manner, which we derive from the noalias attribute, + // we also need to know that it does not sneakily modify the destination + // slot in the caller. We don't have parameter attributes to go by + // for this one, so we just rely on AA to figure it out for us. + AliasAnalysis& AA = getAnalysis(); + if (AA.getModRefInfo(C, cpy->getRawDest(), cpyLength->getZExtValue()) != + AliasAnalysis::NoModRef) + return false; + + // If all the checks have passed, then we're alright to do the transformation. + CS.setArgument(0, cpyDest); + + // Drop any cached information about the call, because we may have changed + // its dependence information by changing its parameter. + MemoryDependenceAnalysis& MD = getAnalysis(); + MD.dropInstruction(C); + + // Remove the memcpy + MD.removeInstruction(cpy); + toErase.push_back(cpy); + + return true; +} + +/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which +/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be +/// a memcpy from X to Z (or potentially a memmove, depending on circumstances). +/// This allows later passes to remove the first memcpy altogether. +bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep, + SmallVector& toErase) { + // We can only transforms memcpy's where the dest of one is the source of the + // other + if (M->getSource() != MDep->getDest()) + return false; + + // Second, the length of the memcpy's must be the same, or the preceeding one + // must be larger than the following one. + ConstantInt* C1 = dyn_cast(MDep->getLength()); + ConstantInt* C2 = dyn_cast(M->getLength()); + if (!C1 || !C2) + return false; + + uint64_t CpySize = C1->getValue().getZExtValue(); + uint64_t DepSize = C2->getValue().getZExtValue(); + + if (DepSize < CpySize) + return false; + + // Finally, we have to make sure that the dest of the second does not + // alias the source of the first + AliasAnalysis& AA = getAnalysis(); + if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) != + AliasAnalysis::NoAlias) + return false; + else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) != + AliasAnalysis::NoAlias) + return false; + else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize) + != AliasAnalysis::NoAlias) + return false; + + // If all checks passed, then we can transform these memcpy's + Function* MemCpyFun = Intrinsic::getDeclaration( + M->getParent()->getParent()->getParent(), + M->getIntrinsicID()); + + std::vector args; + args.push_back(M->getRawDest()); + args.push_back(MDep->getRawSource()); + args.push_back(M->getLength()); + args.push_back(M->getAlignment()); + + CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M); + + MemoryDependenceAnalysis& MD = getAnalysis(); + if (MD.getDependency(C) == MDep) { + MD.dropInstruction(M); + toErase.push_back(M); + return true; + } else { + MD.removeInstruction(C); + toErase.push_back(C); + return false; + } +} + +/// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction* I, ValueNumberedSet& currAvail, @@ -889,13 +1215,62 @@ bool GVN::processInstruction(Instruction* I, SmallVector& toErase) { if (LoadInst* L = dyn_cast(I)) { return processLoad(L, lastSeenLoad, toErase); + } else if (MemCpyInst* M = dyn_cast(I)) { + MemoryDependenceAnalysis& MD = getAnalysis(); + + // The are two possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE + // b) call-memcpy xform for sret return slot optimization + Instruction* dep = MD.getDependency(M); + if (dep == MemoryDependenceAnalysis::None || + dep == MemoryDependenceAnalysis::NonLocal) + return false; + if (MemCpyInst *MemCpy = dyn_cast(dep)) + return processMemCpy(M, MemCpy, toErase); + if (CallInst* C = dyn_cast(dep)) + return performReturnSlotOptzn(M, C, toErase); + return false; } unsigned num = VN.lookup_or_add(I); - if (currAvail.test(num)) { + // Collapse PHI nodes + if (PHINode* p = dyn_cast(I)) { + Value* constVal = CollapsePhi(p); + + if (constVal) { + for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); + PI != PE; ++PI) + if (PI->second.count(p)) + PI->second.erase(p); + + p->replaceAllUsesWith(constVal); + toErase.push_back(p); + } + // Perform value-number based elimination + } else if (currAvail.test(num)) { Value* repl = find_leader(currAvail, num); + if (CallInst* CI = dyn_cast(I)) { + AliasAnalysis& AA = getAnalysis(); + if (!AA.doesNotAccessMemory(CI)) { + MemoryDependenceAnalysis& MD = getAnalysis(); + if (cast(repl)->getParent() != CI->getParent() || + MD.getDependency(CI) != MD.getDependency(cast(repl))) { + // There must be an intervening may-alias store, so nothing from + // this point on will be able to be replaced with the preceding call + currAvail.erase(repl); + currAvail.insert(I); + + return false; + } + } + } + + // Remove it! + MemoryDependenceAnalysis& MD = getAnalysis(); + MD.removeInstruction(I); + VN.erase(I); I->replaceAllUsesWith(repl); toErase.push_back(I); @@ -911,10 +1286,27 @@ bool GVN::processInstruction(Instruction* I, // GVN::runOnFunction - This is the main transformation entry point for a // function. // -bool GVN::runOnFunction(Function &F) { +bool GVN::runOnFunction(Function& F) { + VN.setAliasAnalysis(&getAnalysis()); + + bool changed = false; + bool shouldContinue = true; + + while (shouldContinue) { + shouldContinue = iterateOnFunction(F); + changed |= shouldContinue; + } + + return changed; +} + + +// GVN::iterateOnFunction - Executes one iteration of GVN +bool GVN::iterateOnFunction(Function &F) { // Clean out global sets from any previous functions VN.clear(); availableOut.clear(); + phiMap.clear(); bool changed_function = false; @@ -945,11 +1337,12 @@ bool GVN::runOnFunction(Function &F) { // Avoid iterator invalidation ++BI; - + for (SmallVector::iterator I = toErase.begin(), - E = toErase.end(); I != E; ++I) + E = toErase.end(); I != E; ++I) { (*I)->eraseFromParent(); - + } + toErase.clear(); } }