When performing return slot optimization, remember to inform memdep when we're removi...

[oota-llvm.git] / lib / Transforms / Scalar / GVN.cpp
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp

index dfe0dff3ad69e3b4c406423ff71d4492a41a4f11..481956f6b4ce039dbb5307cc558372ce9e4c5d06 100644 (file)
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -2,8 +2,8 @@
  //
  //                     The LLVM Compiler Infrastructure
  //
  //
  //                     The LLVM Compiler Infrastructure
  //
-// This file was developed by the Owen Anderson and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
  //
  //===----------------------------------------------------------------------===//
  //
  //
  //===----------------------------------------------------------------------===//
  //
@@ -19,18 +19,22 @@
  #include "llvm/Constants.h"
  #include "llvm/DerivedTypes.h"
  #include "llvm/Function.h"
  #include "llvm/Constants.h"
  #include "llvm/DerivedTypes.h"
  #include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
  #include "llvm/Instructions.h"
  #include "llvm/Instructions.h"
+#include "llvm/ParameterAttributes.h"
  #include "llvm/Value.h"
  #include "llvm/Value.h"
-#include "llvm/Analysis/Dominators.h"
  #include "llvm/ADT/BitVector.h"
  #include "llvm/ADT/DenseMap.h"
  #include "llvm/ADT/DepthFirstIterator.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/BitVector.h"
  #include "llvm/ADT/DenseMap.h"
  #include "llvm/ADT/DepthFirstIterator.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  #include "llvm/Support/CFG.h"
  #include "llvm/Support/Compiler.h"
  #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  #include "llvm/Support/CFG.h"
  #include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetData.h"
  using namespace llvm;
  
  //===----------------------------------------------------------------------===//
  using namespace llvm;
  
  //===----------------------------------------------------------------------===//
@@ -51,7 +55,7 @@ namespace {
                              FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
                              SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
                              FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 
                              FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
                              SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
                              FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 
-                            PTRTOINT, INTTOPTR, BITCAST, GEP, EMPTY,
+                            PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, EMPTY,
                              TOMBSTONE };
  
      ExpressionOpcode opcode;
                              TOMBSTONE };
  
      ExpressionOpcode opcode;
@@ -60,6 +64,7 @@ namespace {
      uint32_t secondVN;
      uint32_t thirdVN;
      SmallVector<uint32_t, 4> varargs;
      uint32_t secondVN;
      uint32_t thirdVN;
      SmallVector<uint32_t, 4> varargs;
+    Value* function;
    
      Expression() { }
      Expression(ExpressionOpcode o) : opcode(o) { }
    
      Expression() { }
      Expression(ExpressionOpcode o) : opcode(o) { }
@@ -71,6 +76,8 @@ namespace {
          return true;
        else if (type != other.type)
          return false;
          return true;
        else if (type != other.type)
          return false;
+      else if (function != other.function)
+        return false;
        else if (firstVN != other.firstVN)
          return false;
        else if (secondVN != other.secondVN)
        else if (firstVN != other.firstVN)
          return false;
        else if (secondVN != other.secondVN)
@@ -96,6 +103,8 @@ namespace {
          return false;
        else if (type != other.type)
          return true;
          return false;
        else if (type != other.type)
          return true;
+      else if (function != other.function)
+        return true;
        else if (firstVN != other.firstVN)
          return true;
        else if (secondVN != other.secondVN)
        else if (firstVN != other.firstVN)
          return true;
        else if (secondVN != other.secondVN)
@@ -119,6 +128,7 @@ namespace {
      private:
        DenseMap<Value*, uint32_t> valueNumbering;
        DenseMap<Expression, uint32_t> expressionNumbering;
      private:
        DenseMap<Value*, uint32_t> valueNumbering;
        DenseMap<Expression, uint32_t> expressionNumbering;
+      AliasAnalysis* AA;
    
        uint32_t nextValueNumber;
      
    
        uint32_t nextValueNumber;
      
@@ -133,21 +143,29 @@ namespace {
        Expression create_expression(SelectInst* V);
        Expression create_expression(CastInst* C);
        Expression create_expression(GetElementPtrInst* G);
        Expression create_expression(SelectInst* V);
        Expression create_expression(CastInst* C);
        Expression create_expression(GetElementPtrInst* G);
+      Expression create_expression(CallInst* C);
      public:
      public:
-      ValueTable() { nextValueNumber = 1; }
+      ValueTable() : nextValueNumber(1) { }
        uint32_t lookup_or_add(Value* V);
        uint32_t lookup(Value* V) const;
        void add(Value* V, uint32_t num);
        void clear();
        void erase(Value* v);
        unsigned size();
        uint32_t lookup_or_add(Value* V);
        uint32_t lookup(Value* V) const;
        void add(Value* V, uint32_t num);
        void clear();
        void erase(Value* v);
        unsigned size();
+      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+      uint32_t hash_operand(Value* v);
    };
  }
  
  namespace llvm {
    };
  }
  
  namespace llvm {
-template <> struct DenseMapKeyInfo<Expression> {
-  static inline Expression getEmptyKey() { return Expression(Expression::EMPTY); }
-  static inline Expression getTombstoneKey() { return Expression(Expression::TOMBSTONE); }
+template <> struct DenseMapInfo<Expression> {
+  static inline Expression getEmptyKey() {
+    return Expression(Expression::EMPTY);
+  }
+  
+  static inline Expression getTombstoneKey() {
+    return Expression(Expression::TOMBSTONE);
+  }
    
    static unsigned getHashValue(const Expression e) {
      unsigned hash = e.opcode;
    
    static unsigned getHashValue(const Expression e) {
      unsigned hash = e.opcode;
@@ -160,12 +178,19 @@ template <> struct DenseMapKeyInfo<Expression> {
              (unsigned)((uintptr_t)e.type >> 9) +
              hash * 37;
      
              (unsigned)((uintptr_t)e.type >> 9) +
              hash * 37;
      
-    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), E = e.varargs.end();
-         I != E; ++I)
+    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
+         E = e.varargs.end(); I != E; ++I)
        hash = *I + hash * 37;
      
        hash = *I + hash * 37;
      
+    hash = (unsigned)((uintptr_t)e.function >> 4) ^
+            (unsigned)((uintptr_t)e.function >> 9) +
+            hash * 37;
+    
      return hash;
    }
      return hash;
    }
+  static bool isEqual(const Expression &LHS, const Expression &RHS) {
+    return LHS == RHS;
+  }
    static bool isPod() { return true; }
  };
  }
    static bool isPod() { return true; }
  };
  }
@@ -317,12 +342,38 @@ Expression::ExpressionOpcode
    }
  }
  
    }
  }
  
+uint32_t ValueTable::hash_operand(Value* v) {
+  if (CallInst* CI = dyn_cast<CallInst>(v))
+    if (!AA->doesNotAccessMemory(CI))
+      return nextValueNumber++;
+  
+  return lookup_or_add(v);
+}
+
+Expression ValueTable::create_expression(CallInst* C) {
+  Expression e;
+  
+  e.type = C->getType();
+  e.firstVN = 0;
+  e.secondVN = 0;
+  e.thirdVN = 0;
+  e.function = C->getCalledFunction();
+  e.opcode = Expression::CALL;
+  
+  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
+       I != E; ++I)
+    e.varargs.push_back(hash_operand(*I));
+  
+  return e;
+}
+
  Expression ValueTable::create_expression(BinaryOperator* BO) {
    Expression e;
      
  Expression ValueTable::create_expression(BinaryOperator* BO) {
    Expression e;
      
-  e.firstVN = lookup_or_add(BO->getOperand(0));
-  e.secondVN = lookup_or_add(BO->getOperand(1));
+  e.firstVN = hash_operand(BO->getOperand(0));
+  e.secondVN = hash_operand(BO->getOperand(1));
    e.thirdVN = 0;
    e.thirdVN = 0;
+  e.function = 0;
    e.type = BO->getType();
    e.opcode = getOpcode(BO);
    
    e.type = BO->getType();
    e.opcode = getOpcode(BO);
    
@@ -332,9 +383,10 @@ Expression ValueTable::create_expression(BinaryOperator* BO) {
  Expression ValueTable::create_expression(CmpInst* C) {
    Expression e;
      
  Expression ValueTable::create_expression(CmpInst* C) {
    Expression e;
      
-  e.firstVN = lookup_or_add(C->getOperand(0));
-  e.secondVN = lookup_or_add(C->getOperand(1));
+  e.firstVN = hash_operand(C->getOperand(0));
+  e.secondVN = hash_operand(C->getOperand(1));
    e.thirdVN = 0;
    e.thirdVN = 0;
+  e.function = 0;
    e.type = C->getType();
    e.opcode = getOpcode(C);
    
    e.type = C->getType();
    e.opcode = getOpcode(C);
    
@@ -344,9 +396,10 @@ Expression ValueTable::create_expression(CmpInst* C) {
  Expression ValueTable::create_expression(CastInst* C) {
    Expression e;
      
  Expression ValueTable::create_expression(CastInst* C) {
    Expression e;
      
-  e.firstVN = lookup_or_add(C->getOperand(0));
+  e.firstVN = hash_operand(C->getOperand(0));
    e.secondVN = 0;
    e.thirdVN = 0;
    e.secondVN = 0;
    e.thirdVN = 0;
+  e.function = 0;
    e.type = C->getType();
    e.opcode = getOpcode(C);
    
    e.type = C->getType();
    e.opcode = getOpcode(C);
    
@@ -356,9 +409,10 @@ Expression ValueTable::create_expression(CastInst* C) {
  Expression ValueTable::create_expression(ShuffleVectorInst* S) {
    Expression e;
      
  Expression ValueTable::create_expression(ShuffleVectorInst* S) {
    Expression e;
      
-  e.firstVN = lookup_or_add(S->getOperand(0));
-  e.secondVN = lookup_or_add(S->getOperand(1));
-  e.thirdVN = lookup_or_add(S->getOperand(2));
+  e.firstVN = hash_operand(S->getOperand(0));
+  e.secondVN = hash_operand(S->getOperand(1));
+  e.thirdVN = hash_operand(S->getOperand(2));
+  e.function = 0;
    e.type = S->getType();
    e.opcode = Expression::SHUFFLE;
    
    e.type = S->getType();
    e.opcode = Expression::SHUFFLE;
    
@@ -368,9 +422,10 @@ Expression ValueTable::create_expression(ShuffleVectorInst* S) {
  Expression ValueTable::create_expression(ExtractElementInst* E) {
    Expression e;
      
  Expression ValueTable::create_expression(ExtractElementInst* E) {
    Expression e;
      
-  e.firstVN = lookup_or_add(E->getOperand(0));
-  e.secondVN = lookup_or_add(E->getOperand(1));
+  e.firstVN = hash_operand(E->getOperand(0));
+  e.secondVN = hash_operand(E->getOperand(1));
    e.thirdVN = 0;
    e.thirdVN = 0;
+  e.function = 0;
    e.type = E->getType();
    e.opcode = Expression::EXTRACT;
    
    e.type = E->getType();
    e.opcode = Expression::EXTRACT;
    
@@ -380,9 +435,10 @@ Expression ValueTable::create_expression(ExtractElementInst* E) {
  Expression ValueTable::create_expression(InsertElementInst* I) {
    Expression e;
      
  Expression ValueTable::create_expression(InsertElementInst* I) {
    Expression e;
      
-  e.firstVN = lookup_or_add(I->getOperand(0));
-  e.secondVN = lookup_or_add(I->getOperand(1));
-  e.thirdVN = lookup_or_add(I->getOperand(2));
+  e.firstVN = hash_operand(I->getOperand(0));
+  e.secondVN = hash_operand(I->getOperand(1));
+  e.thirdVN = hash_operand(I->getOperand(2));
+  e.function = 0;
    e.type = I->getType();
    e.opcode = Expression::INSERT;
    
    e.type = I->getType();
    e.opcode = Expression::INSERT;
    
@@ -392,9 +448,10 @@ Expression ValueTable::create_expression(InsertElementInst* I) {
  Expression ValueTable::create_expression(SelectInst* I) {
    Expression e;
      
  Expression ValueTable::create_expression(SelectInst* I) {
    Expression e;
      
-  e.firstVN = lookup_or_add(I->getCondition());
-  e.secondVN = lookup_or_add(I->getTrueValue());
-  e.thirdVN = lookup_or_add(I->getFalseValue());
+  e.firstVN = hash_operand(I->getCondition());
+  e.secondVN = hash_operand(I->getTrueValue());
+  e.thirdVN = hash_operand(I->getFalseValue());
+  e.function = 0;
    e.type = I->getType();
    e.opcode = Expression::SELECT;
    
    e.type = I->getType();
    e.opcode = Expression::SELECT;
    
@@ -404,15 +461,16 @@ Expression ValueTable::create_expression(SelectInst* I) {
  Expression ValueTable::create_expression(GetElementPtrInst* G) {
    Expression e;
      
  Expression ValueTable::create_expression(GetElementPtrInst* G) {
    Expression e;
      
-  e.firstVN = lookup_or_add(G->getPointerOperand());
+  e.firstVN = hash_operand(G->getPointerOperand());
    e.secondVN = 0;
    e.thirdVN = 0;
    e.secondVN = 0;
    e.thirdVN = 0;
+  e.function = 0;
    e.type = G->getType();
    e.opcode = Expression::GEP;
    
    for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
         I != E; ++I)
    e.type = G->getType();
    e.opcode = Expression::GEP;
    
    for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
         I != E; ++I)
-    e.varargs.push_back(lookup_or_add(*I));
+    e.varargs.push_back(hash_operand(*I));
    
    return e;
  }
    
    return e;
  }
@@ -428,8 +486,25 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
    if (VI != valueNumbering.end())
      return VI->second;
    
    if (VI != valueNumbering.end())
      return VI->second;
    
-  
-  if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
+  if (CallInst* C = dyn_cast<CallInst>(V)) {
+    if (AA->onlyReadsMemory(C)) { // includes doesNotAccessMemory
+      Expression e = create_expression(C);
+    
+      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+      if (EI != expressionNumbering.end()) {
+        valueNumbering.insert(std::make_pair(V, EI->second));
+        return EI->second;
+      } else {
+        expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+        valueNumbering.insert(std::make_pair(V, nextValueNumber));
+      
+        return nextValueNumber++;
+      }
+    } else {
+      valueNumbering.insert(std::make_pair(V, nextValueNumber));
+      return nextValueNumber++;
+    }
+  } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
      Expression e = create_expression(BO);
      
      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
      Expression e = create_expression(BO);
      
      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
@@ -637,12 +712,20 @@ namespace {
      
      DenseMap<BasicBlock*, ValueNumberedSet> availableOut;
      
      
      DenseMap<BasicBlock*, ValueNumberedSet> availableOut;
      
+    typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType;
+    PhiMapType phiMap;
+    
+    
      // This transformation requires dominator postdominator info
      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
        AU.setPreservesCFG();
        AU.addRequired<DominatorTree>();
        AU.addRequired<MemoryDependenceAnalysis>();
      // This transformation requires dominator postdominator info
      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
        AU.setPreservesCFG();
        AU.addRequired<DominatorTree>();
        AU.addRequired<MemoryDependenceAnalysis>();
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetData>();
+      AU.addPreserved<AliasAnalysis>();
        AU.addPreserved<MemoryDependenceAnalysis>();
        AU.addPreserved<MemoryDependenceAnalysis>();
+      AU.addPreserved<TargetData>();
      }
    
      // Helper fuctions
      }
    
      // Helper fuctions
@@ -656,11 +739,19 @@ namespace {
                              ValueNumberedSet& currAvail,
                              DenseMap<Value*, LoadInst*>& lastSeenLoad,
                              SmallVector<Instruction*, 4>& toErase);
                              ValueNumberedSet& currAvail,
                              DenseMap<Value*, LoadInst*>& lastSeenLoad,
                              SmallVector<Instruction*, 4>& toErase);
-    bool processNonLocalLoad(LoadInst* L, SmallVector<Instruction*, 4>& toErase);
+    bool processNonLocalLoad(LoadInst* L,
+                             SmallVector<Instruction*, 4>& toErase);
+    bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
+                       SmallVector<Instruction*, 4>& toErase);
+    bool performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
+                                SmallVector<Instruction*, 4>& toErase);
      Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
                              DenseMap<BasicBlock*, Value*> &Phis,
                              bool top_level = false);
      void dump(DenseMap<BasicBlock*, Value*>& d);
      Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
                              DenseMap<BasicBlock*, Value*> &Phis,
                              bool top_level = false);
      void dump(DenseMap<BasicBlock*, Value*>& d);
+    bool iterateOnFunction(Function &F);
+    Value* CollapsePhi(PHINode* p);
+    bool isSafeReplacement(PHINode* p, Instruction* inst);
    };
    
    char GVN::ID = 0;
    };
    
    char GVN::ID = 0;
@@ -712,6 +803,35 @@ void GVN::dump(DenseMap<BasicBlock*, Value*>& d) {
    printf("}\n");
  }
  
    printf("}\n");
  }
  
+Value* GVN::CollapsePhi(PHINode* p) {
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+  Value* constVal = p->hasConstantValue();
+  
+  if (constVal) {
+    if (Instruction* inst = dyn_cast<Instruction>(constVal)) {
+      if (DT.dominates(inst, p))
+        if (isSafeReplacement(p, inst))
+          return inst;
+    } else {
+      return constVal;
+    }
+  }
+  
+  return 0;
+}
+
+bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) {
+  if (!isa<PHINode>(inst))
+    return true;
+  
+  for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
+       UI != E; ++UI)
+    if (PHINode* use_phi = dyn_cast<PHINode>(UI))
+      if (use_phi->getParent() == inst->getParent())
+        return false;
+  
+  return true;
+}
  
  /// GetValueForBlock - Get the value to use within the specified basic block.
  /// available values are in Phis.
  
  /// GetValueForBlock - Get the value to use within the specified basic block.
  /// available values are in Phis.
@@ -720,96 +840,121 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig,
                                 bool top_level) { 
                                   
    // If we have already computed this value, return the previously computed val.
                                 bool top_level) { 
                                   
    // If we have already computed this value, return the previously computed val.
-  Value *&V = Phis[BB];
-  if (V && ! top_level) return V;
+  DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB);
+  if (V != Phis.end() && !top_level) return V->second;
    
    BasicBlock* singlePred = BB->getSinglePredecessor();
    
    BasicBlock* singlePred = BB->getSinglePredecessor();
-  if (singlePred)
-    return V = GetValueForBlock(singlePred, orig, Phis);
-  
+  if (singlePred) {
+    Value *ret = GetValueForBlock(singlePred, orig, Phis);
+    Phis[BB] = ret;
+    return ret;
+  }
    // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
    // now, then get values to fill in the incoming values for the PHI.
    PHINode *PN = new PHINode(orig->getType(), orig->getName()+".rle",
                              BB->begin());
    PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB)));
    // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
    // now, then get values to fill in the incoming values for the PHI.
    PHINode *PN = new PHINode(orig->getType(), orig->getName()+".rle",
                              BB->begin());
    PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB)));
-  V = PN;
    
    
-  bool all_same = true;
-  Value* first = 0;
+  if (Phis.count(BB) == 0)
+    Phis.insert(std::make_pair(BB, PN));
    
    // Fill in the incoming values for the block.
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      Value* val = GetValueForBlock(*PI, orig, Phis);
    
    // Fill in the incoming values for the block.
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      Value* val = GetValueForBlock(*PI, orig, Phis);
-    if (first == 0)
-      first = val;
-    else if (all_same && first != val)
-      all_same = false;
      
      PN->addIncoming(val, *PI);
    }
      
      PN->addIncoming(val, *PI);
    }
+  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+  AA.copyValue(orig, PN);
    
    
-  if (all_same) {
+  // Attempt to collapse PHI nodes that are trivially redundant
+  Value* v = CollapsePhi(PN);
+  if (v) {
      MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
      MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-    
+
      MD.removeInstruction(PN);
      MD.removeInstruction(PN);
-    PN->replaceAllUsesWith(first);
-    
-    SmallVector<BasicBlock*, 4> toRemove;
+    PN->replaceAllUsesWith(v);
+
      for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(),
           E = Phis.end(); I != E; ++I)
        if (I->second == PN)
      for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(),
           E = Phis.end(); I != E; ++I)
        if (I->second == PN)
-        toRemove.push_back(I->first);
-    for (SmallVector<BasicBlock*, 4>::iterator I = toRemove.begin(),
-         E= toRemove.end(); I != E; ++I)
-      Phis[*I] = first;
-    
+        I->second = v;
+
      PN->eraseFromParent();
      PN->eraseFromParent();
-    
-    Phis[BB] = first;
-    
-    return first;
+
+    Phis[BB] = v;
+
+    return v;
    }
  
    }
  
+  // Cache our phi construction results
+  phiMap[orig->getPointerOperand()].insert(PN);
    return PN;
  }
  
    return PN;
  }
  
-bool GVN::processNonLocalLoad(LoadInst* L, SmallVector<Instruction*, 4>& toErase) {
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst* L,
+                              SmallVector<Instruction*, 4>& toErase) {
    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
    
    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
    
+  // Find the non-local dependencies of the load
    DenseMap<BasicBlock*, Value*> deps;
    MD.getNonLocalDependency(L, deps);
    
    DenseMap<BasicBlock*, Value*> repl;
    DenseMap<BasicBlock*, Value*> deps;
    MD.getNonLocalDependency(L, deps);
    
    DenseMap<BasicBlock*, Value*> repl;
+  
+  // Filter out useless results (non-locals, etc)
    for (DenseMap<BasicBlock*, Value*>::iterator I = deps.begin(), E = deps.end();
         I != E; ++I)
      if (I->second == MemoryDependenceAnalysis::None) {
        return false;
      } else if (I->second == MemoryDependenceAnalysis::NonLocal) {
        continue;
    for (DenseMap<BasicBlock*, Value*>::iterator I = deps.begin(), E = deps.end();
         I != E; ++I)
      if (I->second == MemoryDependenceAnalysis::None) {
        return false;
      } else if (I->second == MemoryDependenceAnalysis::NonLocal) {
        continue;
-    }else if (StoreInst* S = dyn_cast<StoreInst>(I->second)) {
+    } else if (StoreInst* S = dyn_cast<StoreInst>(I->second)) {
        if (S->getPointerOperand() == L->getPointerOperand())
        if (S->getPointerOperand() == L->getPointerOperand())
-        repl.insert(std::make_pair(I->first, S->getOperand(0)));
+        repl[I->first] = S->getOperand(0);
        else
          return false;
      } else if (LoadInst* LD = dyn_cast<LoadInst>(I->second)) {
        if (LD->getPointerOperand() == L->getPointerOperand())
        else
          return false;
      } else if (LoadInst* LD = dyn_cast<LoadInst>(I->second)) {
        if (LD->getPointerOperand() == L->getPointerOperand())
-        repl.insert(std::make_pair(I->first, LD));
+        repl[I->first] = LD;
        else
          return false;
      } else {
        return false;
      }
    
        else
          return false;
      } else {
        return false;
      }
    
+  // Use cached PHI construction information from previous runs
+  SmallPtrSet<Instruction*, 4>& p = phiMap[L->getPointerOperand()];
+  for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
+       I != E; ++I) {
+    if ((*I)->getParent() == L->getParent()) {
+      MD.removeInstruction(L);
+      L->replaceAllUsesWith(*I);
+      toErase.push_back(L);
+      NumGVNLoad++;
+      
+      return true;
+    } else {
+      repl.insert(std::make_pair((*I)->getParent(), *I));
+    }
+  }
+  
+  // Perform PHI construction
    SmallPtrSet<BasicBlock*, 4> visited;
    Value* v = GetValueForBlock(L->getParent(), L, repl, true);
    
    MD.removeInstruction(L);
    L->replaceAllUsesWith(v);
    toErase.push_back(L);
    SmallPtrSet<BasicBlock*, 4> visited;
    Value* v = GetValueForBlock(L->getParent(), L, repl, true);
    
    MD.removeInstruction(L);
    L->replaceAllUsesWith(v);
    toErase.push_back(L);
+  NumGVNLoad++;
  
    return true;
  }
  
  
    return true;
  }
  
+/// processLoad - Attempt to eliminate a load, first by eliminating it
+/// locally, and then attempting non-local elimination if that fails.
  bool GVN::processLoad(LoadInst* L,
                           DenseMap<Value*, LoadInst*>& lastLoad,
                           SmallVector<Instruction*, 4>& toErase) {
  bool GVN::processLoad(LoadInst* L,
                           DenseMap<Value*, LoadInst*>& lastLoad,
                           SmallVector<Instruction*, 4>& toErase) {
@@ -823,12 +968,23 @@ bool GVN::processLoad(LoadInst* L,
    
    // ... to a pointer that has been loaded from before...
    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
    
    // ... to a pointer that has been loaded from before...
    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+  bool removedNonLocal = false;
    Instruction* dep = MD.getDependency(L);
    if (dep == MemoryDependenceAnalysis::NonLocal &&
    Instruction* dep = MD.getDependency(L);
    if (dep == MemoryDependenceAnalysis::NonLocal &&
-      L->getParent() != &L->getParent()->getParent()->getEntryBlock())
-    processNonLocalLoad(L, toErase);
+      L->getParent() != &L->getParent()->getParent()->getEntryBlock()) {
+    removedNonLocal = processNonLocalLoad(L, toErase);
+    
+    if (!removedNonLocal)
+      last = L;
+    
+    return removedNonLocal;
+  }
+  
+  
    bool deletedLoad = false;
    
    bool deletedLoad = false;
    
+  // Walk up the dependency chain until we either find
+  // a dependency we can use, or we can't walk any further
    while (dep != MemoryDependenceAnalysis::None &&
           dep != MemoryDependenceAnalysis::NonLocal &&
           (isa<LoadInst>(dep) || isa<StoreInst>(dep))) {
    while (dep != MemoryDependenceAnalysis::None &&
           dep != MemoryDependenceAnalysis::NonLocal &&
           (isa<LoadInst>(dep) || isa<StoreInst>(dep))) {
@@ -865,14 +1021,193 @@ bool GVN::processLoad(LoadInst* L,
        dep = MD.getDependency(L, dep);
      }
    }
        dep = MD.getDependency(L, dep);
      }
    }
-  
+
+  if (dep != MemoryDependenceAnalysis::None &&
+      dep != MemoryDependenceAnalysis::NonLocal &&
+      isa<AllocationInst>(dep)) {
+    // Check that this load is actually from the
+    // allocation we found
+    Value* v = L->getOperand(0);
+    while (true) {
+      if (BitCastInst *BC = dyn_cast<BitCastInst>(v))
+        v = BC->getOperand(0);
+      else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v))
+        v = GEP->getOperand(0);
+      else
+        break;
+    }
+    if (v == dep) {
+      // If this load depends directly on an allocation, there isn't
+      // anything stored there; therefore, we can optimize this load
+      // to undef.
+      MD.removeInstruction(L);
+
+      L->replaceAllUsesWith(UndefValue::get(L->getType()));
+      toErase.push_back(L);
+      deletedLoad = true;
+      NumGVNLoad++;
+    }
+  }
+
    if (!deletedLoad)
      last = L;
    
    return deletedLoad;
  }
  
    if (!deletedLoad)
      last = L;
    
    return deletedLoad;
  }
  
-/// buildsets_availout - When calculating availability, handle an instruction
+/// isReturnSlotOptznProfitable - Determine if performing a return slot 
+/// fusion with the slot dest is profitable
+static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) {
+  // We currently consider it profitable if dest is otherwise dead.
+  SmallVector<User*, 8> useList(dest->use_begin(), dest->use_end());
+  while (!useList.empty()) {
+    User* UI = useList.back();
+    
+    if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
+      useList.pop_back();
+      for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+           I != E; ++I)
+        useList.push_back(*I);
+    } else if (UI == cpy)
+      useList.pop_back();
+    else
+      return false;
+  }
+  
+  return true;
+}
+
+/// performReturnSlotOptzn - takes a memcpy and a call that it depends on,
+/// and checks for the possibility of a return slot optimization by having
+/// the call write its result directly into the callees return parameter
+/// rather than using memcpy
+bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
+                                 SmallVector<Instruction*, 4>& toErase) {
+  // Deliberately get the source and destination with bitcasts stripped away,
+  // because we'll need to do type comparisons based on the underlying type.
+  Value* cpyDest = cpy->getDest();
+  Value* cpySrc = cpy->getSource();
+  CallSite CS = CallSite::get(C);
+  
+  // Since this is a return slot optimization, we need to make sure that
+  // the value being copied is, in fact, in a return slot.  We also need to
+  // check that the return slot parameter is marked noalias, so that we can
+  // be sure that changing it will not cause unexpected behavior changes due
+  // to it being accessed through a global or another parameter.
+  if (CS.arg_size() == 0 ||
+      cpySrc != CS.getArgument(0) ||
+      !CS.paramHasAttr(1, ParamAttr::NoAlias | ParamAttr::StructRet))
+    return false;
+  
+  // Check that something sneaky is not happening involving casting
+  // return slot types around.
+  if (CS.getArgument(0)->getType() != cpyDest->getType())
+    return false;
+  // sret --> pointer
+  const PointerType* PT = cast<PointerType>(cpyDest->getType()); 
+  
+  // We can only perform the transformation if the size of the memcpy
+  // is constant and equal to the size of the structure.
+  ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
+  if (!cpyLength)
+    return false;
+  
+  TargetData& TD = getAnalysis<TargetData>();
+  if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
+    return false;
+  
+  // We only perform the transformation if it will be profitable. 
+  if (!isReturnSlotOptznProfitable(cpyDest, cpy))
+    return false;
+  
+  // In addition to knowing that the call does not access the return slot
+  // in some unexpected manner, which we derive from the noalias attribute,
+  // we also need to know that it does not sneakily modify the destination
+  // slot in the caller.  We don't have parameter attributes to go by
+  // for this one, so we just rely on AA to figure it out for us.
+  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+  if (AA.getModRefInfo(C, cpy->getRawDest(), cpyLength->getZExtValue()) !=
+      AliasAnalysis::NoModRef)
+    return false;
+  
+  // If all the checks have passed, then we're alright to do the transformation.
+  CS.setArgument(0, cpyDest);
+  
+  // Drop any cached information about the call, because we may have changed
+  // its dependence information by changing its parameter.
+  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+  MD.dropInstruction(C);
+  
+  // Remove the memcpy
+  MD.removeInstruction(cpy);
+  toErase.push_back(cpy);
+  
+  return true;
+}
+
+/// processMemCpy - perform simplication of memcpy's.  If we have memcpy A which
+/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
+/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
+///  This allows later passes to remove the first memcpy altogether.
+bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
+                        SmallVector<Instruction*, 4>& toErase) {
+  // We can only transforms memcpy's where the dest of one is the source of the
+  // other
+  if (M->getSource() != MDep->getDest())
+    return false;
+  
+  // Second, the length of the memcpy's must be the same, or the preceeding one
+  // must be larger than the following one.
+  ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
+  ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
+  if (!C1 || !C2)
+    return false;
+  
+  uint64_t CpySize = C1->getValue().getZExtValue();
+  uint64_t DepSize = C2->getValue().getZExtValue();
+  
+  if (DepSize < CpySize)
+    return false;
+  
+  // Finally, we have to make sure that the dest of the second does not
+  // alias the source of the first
+  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+  if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
+      AliasAnalysis::NoAlias)
+    return false;
+  else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
+           AliasAnalysis::NoAlias)
+    return false;
+  else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
+           != AliasAnalysis::NoAlias)
+    return false;
+  
+  // If all checks passed, then we can transform these memcpy's
+  Function* MemCpyFun = Intrinsic::getDeclaration(
+                                 M->getParent()->getParent()->getParent(),
+                                 M->getIntrinsicID());
+    
+  std::vector<Value*> args;
+  args.push_back(M->getRawDest());
+  args.push_back(MDep->getRawSource());
+  args.push_back(M->getLength());
+  args.push_back(M->getAlignment());
+  
+  CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M);
+  
+  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+  if (MD.getDependency(C) == MDep) {
+    MD.dropInstruction(M);
+    toErase.push_back(M);
+    return true;
+  } else {
+    MD.removeInstruction(C);
+    toErase.push_back(C);
+    return false;
+  }
+}
+
+/// processInstruction - When calculating availability, handle an instruction
  /// by inserting it into the appropriate sets
  bool GVN::processInstruction(Instruction* I,
                                  ValueNumberedSet& currAvail,
  /// by inserting it into the appropriate sets
  bool GVN::processInstruction(Instruction* I,
                                  ValueNumberedSet& currAvail,
@@ -880,13 +1215,62 @@ bool GVN::processInstruction(Instruction* I,
                                  SmallVector<Instruction*, 4>& toErase) {
    if (LoadInst* L = dyn_cast<LoadInst>(I)) {
      return processLoad(L, lastSeenLoad, toErase);
                                  SmallVector<Instruction*, 4>& toErase) {
    if (LoadInst* L = dyn_cast<LoadInst>(I)) {
      return processLoad(L, lastSeenLoad, toErase);
+  } else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
+    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+    // The are two possible optimizations we can do for memcpy:
+    //   a) memcpy-memcpy xform which exposes redundance for DSE
+    //   b) call-memcpy xform for sret return slot optimization
+    Instruction* dep = MD.getDependency(M);
+    if (dep == MemoryDependenceAnalysis::None ||
+        dep == MemoryDependenceAnalysis::NonLocal)
+      return false;
+    if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
+      return processMemCpy(M, MemCpy, toErase);
+    if (CallInst* C = dyn_cast<CallInst>(dep))
+      return performReturnSlotOptzn(M, C, toErase);
+    return false;
    }
    
    unsigned num = VN.lookup_or_add(I);
    
    }
    
    unsigned num = VN.lookup_or_add(I);
    
-  if (currAvail.test(num)) {
+  // Collapse PHI nodes
+  if (PHINode* p = dyn_cast<PHINode>(I)) {
+    Value* constVal = CollapsePhi(p);
+    
+    if (constVal) {
+      for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end();
+           PI != PE; ++PI)
+        if (PI->second.count(p))
+          PI->second.erase(p);
+        
+      p->replaceAllUsesWith(constVal);
+      toErase.push_back(p);
+    }
+  // Perform value-number based elimination
+  } else if (currAvail.test(num)) {
      Value* repl = find_leader(currAvail, num);
      
      Value* repl = find_leader(currAvail, num);
      
+    if (CallInst* CI = dyn_cast<CallInst>(I)) {
+      AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+      if (!AA.doesNotAccessMemory(CI)) {
+        MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+        if (cast<Instruction>(repl)->getParent() != CI->getParent() ||
+            MD.getDependency(CI) != MD.getDependency(cast<CallInst>(repl))) {
+          // There must be an intervening may-alias store, so nothing from
+          // this point on will be able to be replaced with the preceding call
+          currAvail.erase(repl);
+          currAvail.insert(I);
+          
+          return false;
+        }
+      }
+    }
+    
+    // Remove it!
+    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+    MD.removeInstruction(I);
+    
      VN.erase(I);
      I->replaceAllUsesWith(repl);
      toErase.push_back(I);
      VN.erase(I);
      I->replaceAllUsesWith(repl);
      toErase.push_back(I);
@@ -902,10 +1286,27 @@ bool GVN::processInstruction(Instruction* I,
  // GVN::runOnFunction - This is the main transformation entry point for a
  // function.
  //
  // GVN::runOnFunction - This is the main transformation entry point for a
  // function.
  //
-bool GVN::runOnFunction(Function &F) {
+bool GVN::runOnFunction(Function& F) {
+  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+  
+  bool changed = false;
+  bool shouldContinue = true;
+  
+  while (shouldContinue) {
+    shouldContinue = iterateOnFunction(F);
+    changed |= shouldContinue;
+  }
+  
+  return changed;
+}
+
+
+// GVN::iterateOnFunction - Executes one iteration of GVN
+bool GVN::iterateOnFunction(Function &F) {
    // Clean out global sets from any previous functions
    VN.clear();
    availableOut.clear();
    // Clean out global sets from any previous functions
    VN.clear();
    availableOut.clear();
+  phiMap.clear();
   
    bool changed_function = false;
    
   
    bool changed_function = false;
    
@@ -929,17 +1330,19 @@ bool GVN::runOnFunction(Function &F) {
  
      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
           BI != BE; ) {
  
      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
           BI != BE; ) {
-      changed_function |= processInstruction(BI, currAvail, lastSeenLoad, toErase);
+      changed_function |= processInstruction(BI, currAvail,
+                                             lastSeenLoad, toErase);
        
        NumGVNInstr += toErase.size();
        
        // Avoid iterator invalidation
        ++BI;
        
        NumGVNInstr += toErase.size();
        
        // Avoid iterator invalidation
        ++BI;
-      
+
        for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
        for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
-           E = toErase.end(); I != E; ++I)
+           E = toErase.end(); I != E; ++I) {
          (*I)->eraseFromParent();
          (*I)->eraseFromParent();
-      
+      }
+
        toErase.clear();
      }
    }
        toErase.clear();
      }
    }