Check alignment of loads when deciding whether it is safe to execute them

[oota-llvm.git] / lib / Transforms / Scalar / GVN.cpp
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp

index 7454f62b388f4204837628d0322bfe1678ce4b1e..6e709523af38cbc92ed55d507e3e62193696b42f 100644 (file)
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -20,6 +20,7 @@
  #include "llvm/BasicBlock.h"
  #include "llvm/Constants.h"
  #include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
  #include "llvm/Function.h"
  #include "llvm/IntrinsicInst.h"
  #include "llvm/LLVMContext.h"
@@ -31,10 +32,12 @@
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
  #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
  #include "llvm/Analysis/MemoryBuiltins.h"
  #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PHITransAddr.h"
  #include "llvm/Support/CFG.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
@@ -46,7 +49,6 @@
  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  #include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Transforms/Utils/SSAUpdater.h"
-#include <cstdio>
  using namespace llvm;
  
  STATISTIC(NumGVNInstr,  "Number of instructions deleted");
@@ -68,18 +70,44 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
  /// two values.
  namespace {
    struct Expression {
-    enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
-                            UDIV, SDIV, FDIV, UREM, SREM,
-                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
-                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
-                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
-                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
-                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
-                            FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
-                            SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
-                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
-                            PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
-                            INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
+    enum ExpressionOpcode { 
+      ADD = Instruction::Add,
+      FADD = Instruction::FAdd,
+      SUB = Instruction::Sub,
+      FSUB = Instruction::FSub,
+      MUL = Instruction::Mul,
+      FMUL = Instruction::FMul,
+      UDIV = Instruction::UDiv,
+      SDIV = Instruction::SDiv,
+      FDIV = Instruction::FDiv,
+      UREM = Instruction::URem,
+      SREM = Instruction::SRem,
+      FREM = Instruction::FRem,
+      SHL = Instruction::Shl,
+      LSHR = Instruction::LShr,
+      ASHR = Instruction::AShr,
+      AND = Instruction::And,
+      OR = Instruction::Or,
+      XOR = Instruction::Xor,
+      TRUNC = Instruction::Trunc,
+      ZEXT = Instruction::ZExt,
+      SEXT = Instruction::SExt,
+      FPTOUI = Instruction::FPToUI,
+      FPTOSI = Instruction::FPToSI,
+      UITOFP = Instruction::UIToFP,
+      SITOFP = Instruction::SIToFP,
+      FPTRUNC = Instruction::FPTrunc,
+      FPEXT = Instruction::FPExt,
+      PTRTOINT = Instruction::PtrToInt,
+      INTTOPTR = Instruction::IntToPtr,
+      BITCAST = Instruction::BitCast,
+      ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
+      ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
+      FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
+      FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
+      FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
+      SHUFFLE, SELECT, GEP, CALL, CONSTANT,
+      INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
  
      ExpressionOpcode opcode;
      const Type* type;
@@ -125,9 +153,7 @@ namespace {
  
        uint32_t nextValueNumber;
  
-      Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
        Expression::ExpressionOpcode getOpcode(CmpInst* C);
-      Expression::ExpressionOpcode getOpcode(CastInst* C);
        Expression create_expression(BinaryOperator* BO);
        Expression create_expression(CmpInst* C);
        Expression create_expression(ShuffleVectorInst* V);
@@ -188,37 +214,16 @@ template <> struct DenseMapInfo<Expression> {
    static bool isEqual(const Expression &LHS, const Expression &RHS) {
      return LHS == RHS;
    }
-  static bool isPod() { return true; }
  };
+  
+template <>
+struct isPodLike<Expression> { static const bool value = true; };
+
  }
  
  //===----------------------------------------------------------------------===//
  //                     ValueTable Internal Functions
  //===----------------------------------------------------------------------===//
-Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
-  switch(BO->getOpcode()) {
-  default: // THIS SHOULD NEVER HAPPEN
-    llvm_unreachable("Binary operator with unknown opcode?");
-  case Instruction::Add:  return Expression::ADD;
-  case Instruction::FAdd: return Expression::FADD;
-  case Instruction::Sub:  return Expression::SUB;
-  case Instruction::FSub: return Expression::FSUB;
-  case Instruction::Mul:  return Expression::MUL;
-  case Instruction::FMul: return Expression::FMUL;
-  case Instruction::UDiv: return Expression::UDIV;
-  case Instruction::SDiv: return Expression::SDIV;
-  case Instruction::FDiv: return Expression::FDIV;
-  case Instruction::URem: return Expression::UREM;
-  case Instruction::SRem: return Expression::SREM;
-  case Instruction::FRem: return Expression::FREM;
-  case Instruction::Shl:  return Expression::SHL;
-  case Instruction::LShr: return Expression::LSHR;
-  case Instruction::AShr: return Expression::ASHR;
-  case Instruction::And:  return Expression::AND;
-  case Instruction::Or:   return Expression::OR;
-  case Instruction::Xor:  return Expression::XOR;
-  }
-}
  
  Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
    if (isa<ICmpInst>(C)) {
@@ -258,25 +263,6 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
    }
  }
  
-Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
-  switch(C->getOpcode()) {
-  default: // THIS SHOULD NEVER HAPPEN
-    llvm_unreachable("Cast operator with unknown opcode?");
-  case Instruction::Trunc:    return Expression::TRUNC;
-  case Instruction::ZExt:     return Expression::ZEXT;
-  case Instruction::SExt:     return Expression::SEXT;
-  case Instruction::FPToUI:   return Expression::FPTOUI;
-  case Instruction::FPToSI:   return Expression::FPTOSI;
-  case Instruction::UIToFP:   return Expression::UITOFP;
-  case Instruction::SIToFP:   return Expression::SITOFP;
-  case Instruction::FPTrunc:  return Expression::FPTRUNC;
-  case Instruction::FPExt:    return Expression::FPEXT;
-  case Instruction::PtrToInt: return Expression::PTRTOINT;
-  case Instruction::IntToPtr: return Expression::INTTOPTR;
-  case Instruction::BitCast:  return Expression::BITCAST;
-  }
-}
-
  Expression ValueTable::create_expression(CallInst* C) {
    Expression e;
  
@@ -297,7 +283,7 @@ Expression ValueTable::create_expression(BinaryOperator* BO) {
    e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
    e.function = 0;
    e.type = BO->getType();
-  e.opcode = getOpcode(BO);
+  e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
  
    return e;
  }
@@ -320,7 +306,7 @@ Expression ValueTable::create_expression(CastInst* C) {
    e.varargs.push_back(lookup_or_add(C->getOperand(0)));
    e.function = 0;
    e.type = C->getType();
-  e.opcode = getOpcode(C);
+  e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
  
    return e;
  }
@@ -489,21 +475,21 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
      // Check to see if we have a single dominating call instruction that is
      // identical to C.
      for (unsigned i = 0, e = deps.size(); i != e; ++i) {
-      const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
+      const NonLocalDepEntry *I = &deps[i];
        // Ignore non-local dependencies.
-      if (I->second.isNonLocal())
+      if (I->getResult().isNonLocal())
          continue;
  
        // We don't handle non-depedencies.  If we already have a call, reject
        // instruction dependencies.
-      if (I->second.isClobber() || cdep != 0) {
+      if (I->getResult().isClobber() || cdep != 0) {
          cdep = 0;
          break;
        }
  
-      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
+      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
        // FIXME: All duplicated with non-local case.
-      if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
+      if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
          cdep = NonLocalDepCall;
          continue;
        }
@@ -728,13 +714,13 @@ static RegisterPass<GVN> X("gvn",
                             "Global Value Numbering");
  
  void GVN::dump(DenseMap<uint32_t, Value*>& d) {
-  printf("{\n");
+  errs() << "{\n";
    for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
         E = d.end(); I != E; ++I) {
-      printf("%d\n", I->first);
+      errs() << I->first << "\n";
        I->second->dump();
    }
-  printf("}\n");
+  errs() << "}\n";
  }
  
  static bool isSafeReplacement(PHINode* p, Instruction *inst) {
@@ -824,7 +810,7 @@ SpeculationFailure:
    SmallVector<BasicBlock*, 32> BBWorklist;
    BBWorklist.push_back(BB);
  
-  while (!BBWorklist.empty()) {
+  do {
      BasicBlock *Entry = BBWorklist.pop_back_val();
      // Note that this sets blocks to 0 (unavailable) if they happen to not
      // already be in FullyAvailableBlocks.  This is safe.
@@ -836,7 +822,7 @@ SpeculationFailure:
  
      for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
        BBWorklist.push_back(*I);
-  }
+  } while (!BBWorklist.empty());
  
    return false;
  }
@@ -996,18 +982,19 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
  /// Check this case to see if there is anything more we can do before we give
  /// up.  This returns -1 if we have to give up, or a byte number in the stored
  /// value of the piece that feeds the load.
-static int AnalyzeLoadFromClobberingWrite(LoadInst *L, Value *WritePtr,
+static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+                                          Value *WritePtr,
                                            uint64_t WriteSizeInBits,
                                            const TargetData &TD) {
    // If the loaded or stored value is an first class array or struct, don't try
    // to transform them.  We need to be able to bitcast to integer.
-  if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()))
+  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy))
      return -1;
    
    int64_t StoreOffset = 0, LoadOffset = 0;
    Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
    Value *LoadBase = 
-    GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
+    GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
    if (StoreBase != LoadBase)
      return -1;
    
@@ -1016,14 +1003,12 @@ static int AnalyzeLoadFromClobberingWrite(LoadInst *L, Value *WritePtr,
    // FIXME: Study to see if/when this happens.
    if (LoadOffset == StoreOffset) {
  #if 0
-    errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+    dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
      << "Base       = " << *StoreBase << "\n"
      << "Store Ptr  = " << *WritePtr << "\n"
      << "Store Offs = " << StoreOffset << "\n"
-    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
-    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
-    errs() << "'" << L->getParent()->getParent()->getName() << "'"
-    << *L->getParent();
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
  #endif
      return -1;
    }
@@ -1033,7 +1018,7 @@ static int AnalyzeLoadFromClobberingWrite(LoadInst *L, Value *WritePtr,
    // must have gotten confused.
    // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
    // remove this check, as it is duplicated with what we have below.
-  uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
    
    if ((WriteSizeInBits & 7) | (LoadSize & 7))
      return -1;
@@ -1049,14 +1034,12 @@ static int AnalyzeLoadFromClobberingWrite(LoadInst *L, Value *WritePtr,
    }
    if (isAAFailure) {
  #if 0
-    errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+    dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
      << "Base       = " << *StoreBase << "\n"
      << "Store Ptr  = " << *WritePtr << "\n"
      << "Store Offs = " << StoreOffset << "\n"
-    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
-    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
-    errs() << "'" << L->getParent()->getParent()->getName() << "'"
-    << *L->getParent();
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
  #endif
      return -1;
    }
@@ -1076,7 +1059,8 @@ static int AnalyzeLoadFromClobberingWrite(LoadInst *L, Value *WritePtr,
  
  /// AnalyzeLoadFromClobberingStore - This function is called when we have a
  /// memdep query of a load that ends up being a clobbering store.
-static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
+static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+                                          StoreInst *DepSI,
                                            const TargetData &TD) {
    // Cannot handle reading from store of first-class aggregate yet.
    if (isa<StructType>(DepSI->getOperand(0)->getType()) ||
@@ -1084,21 +1068,52 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
      return -1;
  
    Value *StorePtr = DepSI->getPointerOperand();
-  uint64_t StoreSize = TD.getTypeSizeInBits(StorePtr->getType());
-  return AnalyzeLoadFromClobberingWrite(L, StorePtr, StoreSize, TD);
+  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                        StorePtr, StoreSize, TD);
  }
  
-static int AnalyzeLoadFromClobberingMemInst(LoadInst *L, MemIntrinsic *MI,
+static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+                                            MemIntrinsic *MI,
                                              const TargetData &TD) {
    // If the mem operation is a non-constant size, we can't handle it.
    ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
    if (SizeCst == 0) return -1;
    uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
-  
+
+  // If this is memset, we just need to see if the offset is valid in the size
+  // of the memset..
    if (MI->getIntrinsicID() == Intrinsic::memset)
-    return AnalyzeLoadFromClobberingWrite(L, MI->getDest(), MemSizeInBits, TD);
+    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+                                          MemSizeInBits, TD);
+  
+  // If we have a memcpy/memmove, the only case we can handle is if this is a
+  // copy from constant memory.  In that case, we can read directly from the
+  // constant memory.
+  MemTransferInst *MTI = cast<MemTransferInst>(MI);
+  
+  Constant *Src = dyn_cast<Constant>(MTI->getSource());
+  if (Src == 0) return -1;
    
-  // Unhandled memcpy/memmove.
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
+  if (GV == 0 || !GV->isConstant()) return -1;
+  
+  // See if the access is within the bounds of the transfer.
+  int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                              MI->getDest(), MemSizeInBits, TD);
+  if (Offset == -1)
+    return Offset;
+  
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  if (ConstantFoldLoadFromConstPtr(Src, &TD))
+    return Offset;
    return -1;
  }
                                              
@@ -1116,14 +1131,15 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
    uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
    uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
    
+  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
    
    // Compute which bits of the stored value are being used by the load.  Convert
    // to an integer type to start with.
    if (isa<PointerType>(SrcVal->getType()))
-    SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
+    SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
    if (!isa<IntegerType>(SrcVal->getType()))
-    SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
-                             "tmp", InsertPt);
+    SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+                                   "tmp");
    
    // Shift the bits to the least significant depending on endianness.
    unsigned ShiftAmt;
@@ -1133,12 +1149,11 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
      ShiftAmt = (StoreSize-LoadSize-Offset)*8;
    
    if (ShiftAmt)
-    SrcVal = BinaryOperator::CreateLShr(SrcVal,
-                ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
+    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
    
    if (LoadSize != StoreSize)
-    SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
-                           "tmp", InsertPt);
+    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+                                 "tmp");
    
    return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
  }
@@ -1182,9 +1197,20 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
      
      return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
    }
-  
-  // ABORT;
-  return 0;
+ 
+  // Otherwise, this is a memcpy/memmove from a constant global.
+  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+  Constant *Src = cast<Constant>(MTI->getSource());
+
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+  ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  return ConstantFoldLoadFromConstPtr(Src, &TD);
  }
  
  
@@ -1233,6 +1259,32 @@ struct AvailableValueInBlock {
      assert(!isSimpleValue() && "Wrong accessor");
      return cast<MemIntrinsic>(Val.getPointer());
    }
+  
+  /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+  /// defined here to the specified type.  This handles various coercion cases.
+  Value *MaterializeAdjustedValue(const Type *LoadTy,
+                                  const TargetData *TD) const {
+    Value *Res;
+    if (isSimpleValue()) {
+      Res = getSimpleValue();
+      if (Res->getType() != LoadTy) {
+        assert(TD && "Need target data to handle type mismatch case");
+        Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+                                   *TD);
+        
+        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+                     << *getSimpleValue() << '\n'
+                     << *Res << '\n' << "\n\n\n");
+      }
+    } else {
+      Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+                                   LoadTy, BB->getTerminator(), *TD);
+      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+                   << "  " << *getMemIntrinValue() << '\n'
+                   << *Res << '\n' << "\n\n\n");
+    }
+    return Res;
+  }
  };
  
  /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
@@ -1241,7 +1293,15 @@ struct AvailableValueInBlock {
  static Value *ConstructSSAForLoadSet(LoadInst *LI, 
                           SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
                                       const TargetData *TD,
+                                     const DominatorTree &DT,
                                       AliasAnalysis *AA) {
+  // Check for the fully redundant, dominating load case.  In this case, we can
+  // just use the dominating value directly.
+  if (ValuesPerBlock.size() == 1 && 
+      DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
+    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+
+  // Otherwise, we have to construct SSA form.
    SmallVector<PHINode*, 8> NewPHIs;
    SSAUpdater SSAUpdate(&NewPHIs);
    SSAUpdate.Initialize(LI);
@@ -1255,28 +1315,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
      if (SSAUpdate.HasValueForBlock(BB))
        continue;
  
-    unsigned Offset = AV.Offset;
-
-    Value *AvailableVal;
-    if (AV.isSimpleValue()) {
-      AvailableVal = AV.getSimpleValue();
-      if (AvailableVal->getType() != LoadTy) {
-        assert(TD && "Need target data to handle type mismatch case");
-        AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
-                                            BB->getTerminator(), *TD);
-        
-        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
-              << *AV.getSimpleValue() << '\n'
-              << *AvailableVal << '\n' << "\n\n\n");
-      }
-    } else {
-      AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
-                                            LoadTy, BB->getTerminator(), *TD);
-      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
-            << "  " << *AV.getMemIntrinValue() << '\n'
-            << *AvailableVal << '\n' << "\n\n\n");
-    }
-    SSAUpdate.AddAvailableValue(BB, AvailableVal);
+    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
    }
    
    // Perform PHI construction.
@@ -1301,10 +1340,10 @@ static bool isLifetimeStart(Instruction *Inst) {
  bool GVN::processNonLocalLoad(LoadInst *LI,
                                SmallVectorImpl<Instruction*> &toErase) {
    // Find the non-local dependencies of the load.
-  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
+  SmallVector<NonLocalDepResult, 64> Deps;
    MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
                                     Deps);
-  //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
+  //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
    //             << Deps.size() << *LI << '\n');
  
    // If we had to process more than one hundred blocks to find the
@@ -1315,11 +1354,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
  
    // If we had a phi translation failure, we'll have a single entry which is a
    // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1 && Deps[0].second.isClobber()) {
+  if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
      DEBUG(
-      errs() << "GVN: non-local load ";
-      WriteAsOperand(errs(), LI);
-      errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';
+      dbgs() << "GVN: non-local load ";
+      WriteAsOperand(dbgs(), LI);
+      dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
      );
      return false;
    }
@@ -1334,18 +1373,24 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
    const TargetData *TD = 0;
    
    for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
-    BasicBlock *DepBB = Deps[i].first;
-    MemDepResult DepInfo = Deps[i].second;
+    BasicBlock *DepBB = Deps[i].getBB();
+    MemDepResult DepInfo = Deps[i].getResult();
  
      if (DepInfo.isClobber()) {
+      // The address being loaded in this non-local block may not be the same as
+      // the pointer operand of the load if PHI translation occurs.  Make sure
+      // to consider the right address.
+      Value *Address = Deps[i].getAddress();
+      
        // If the dependence is to a store that writes to a superset of the bits
        // read by the load, we can extract the bits we need for the load from the
        // stored value.
        if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
          if (TD == 0)
            TD = getAnalysisIfAvailable<TargetData>();
-        if (TD) {
-          int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
+                                                      DepSI, *TD);
            if (Offset != -1) {
              ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
                                                             DepSI->getOperand(0),
@@ -1360,8 +1405,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
        if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
          if (TD == 0)
            TD = getAnalysisIfAvailable<TargetData>();
-        if (TD) {
-          int Offset = AnalyzeLoadFromClobberingMemInst(LI, DepMI, *TD);
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+                                                        DepMI, *TD);
            if (Offset != -1) {
              ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
                                                                    Offset));
@@ -1435,10 +1481,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
    // load, then it is fully redundant and we can use PHI insertion to compute
    // its value.  Insert PHIs and remove the fully redundant value now.
    if (UnavailableBlocks.empty()) {
-    DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
      
      // Perform PHI construction.
-    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
+    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
                                        VN.getAliasAnalysis());
      LI->replaceAllUsesWith(V);
  
@@ -1476,8 +1522,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
    while (TmpBB->getSinglePredecessor()) {
      isSinglePred = true;
      TmpBB = TmpBB->getSinglePredecessor();
-    if (!TmpBB) // If haven't found any, bail now.
-      return false;
      if (TmpBB == LoadBB) // Infinite (unreachable) loop.
        return false;
      if (Blockers.count(TmpBB))
@@ -1549,7 +1593,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
  
    // We don't currently handle critical edges :(
    if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
-    DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
+    DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
                   << UnavailablePred->getName() << "': " << *LI << '\n');
      return false;
    }
@@ -1557,39 +1601,43 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
    // Do PHI translation to get its value in the predecessor if necessary.  The
    // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
    //
-  // FIXME: This may insert a computation, but we don't tell scalar GVN
-  // optimization stuff about it.  How do we do this?
    SmallVector<Instruction*, 8> NewInsts;
-  Value *LoadPtr = 0;
    
    // If all preds have a single successor, then we know it is safe to insert the
    // load on the pred (?!?), so we can insert code to materialize the pointer if
    // it is not available.
+  PHITransAddr Address(LI->getOperand(0), TD);
+  Value *LoadPtr = 0;
    if (allSingleSucc) {
-    LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB,
-                                             UnavailablePred, TD, *DT,NewInsts);
+    LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+                                                *DT, NewInsts);
    } else {
-    LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB,
-                                                 UnavailablePred, TD, *DT);
+    Address.PHITranslateValue(LoadBB, UnavailablePred);
+    LoadPtr = Address.getAddr();
+    
+    // Make sure the value is live in the predecessor.
+    if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
+      if (!DT->dominates(Inst->getParent(), UnavailablePred))
+        LoadPtr = 0;
    }
  
-  // Assign value numbers to these new instructions.
-  for (SmallVector<Instruction*, 8>::iterator NI = NewInsts.begin(),
-       NE = NewInsts.end(); NI != NE; ++NI) {
-    // FIXME: We really _ought_ to insert these value numbers into their 
-    // parent's availability map.  However, in doing so, we risk getting into
-    // ordering issues.  If a block hasn't been processed yet, we would be
-    // marking a value as AVAIL-IN, which isn't what we intend.
-    VN.lookup_or_add(*NI);
-  }
-    
    // If we couldn't find or insert a computation of this phi translated value,
    // we fail PRE.
    if (LoadPtr == 0) {
-    DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
+    assert(NewInsts.empty() && "Shouldn't insert insts on failure");
+    DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
                   << *LI->getOperand(0) << "\n");
      return false;
    }
+
+  // Assign value numbers to these new instructions.
+  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+    // FIXME: We really _ought_ to insert these value numbers into their 
+    // parent's availability map.  However, in doing so, we risk getting into
+    // ordering issues.  If a block hasn't been processed yet, we would be
+    // marking a value as AVAIL-IN, which isn't what we intend.
+    VN.lookup_or_add(NewInsts[i]);
+  }
    
    // Make sure it is valid to move this load here.  We have to watch out for:
    //  @1 = getelementptr (i8* p, ...
@@ -1602,7 +1650,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
    // put anywhere; this can be improved, but should be conservatively safe.
    if (!allSingleSucc &&
        // FIXME: REEVALUTE THIS.
-      !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) {
+      !isSafeToLoadUnconditionally(LoadPtr,
+                                   UnavailablePred->getTerminator(),
+                                   LI->getAlignment(), TD)) {
      assert(NewInsts.empty() && "Should not have inserted instructions");
      return false;
    }
@@ -1610,9 +1660,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
    // Okay, we can eliminate this load by inserting a reload in the predecessor
    // and using PHI construction to get the value in the other predecessors, do
    // it.
-  DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
+  DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
    DEBUG(if (!NewInsts.empty())
-          errs() << "INSERTED " << NewInsts.size() << " INSTS: "
+          dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
                   << *NewInsts.back() << '\n');
    
    Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
@@ -1623,7 +1673,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
    ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad));
  
    // Perform PHI construction.
-  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
+  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
                                      VN.getAliasAnalysis());
    LI->replaceAllUsesWith(V);
    if (isa<PHINode>(V))
@@ -1662,7 +1712,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
      Value *AvailVal = 0;
      if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
        if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
-        int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
+        int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+                                                    L->getPointerOperand(),
+                                                    DepSI, *TD);
          if (Offset != -1)
            AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
                                            L->getType(), L, *TD);
@@ -1672,14 +1724,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
      // a value on from it.
      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
        if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
-        int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD);
+        int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+                                                      L->getPointerOperand(),
+                                                      DepMI, *TD);
          if (Offset != -1)
            AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
        }
      }
          
      if (AvailVal) {
-      DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+      DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
              << *AvailVal << '\n' << *L << "\n\n\n");
        
        // Replace the load!
@@ -1693,10 +1747,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
          
      DEBUG(
        // fast print dep, using operator<< on instruction would be too slow
-      errs() << "GVN: load ";
-      WriteAsOperand(errs(), L);
+      dbgs() << "GVN: load ";
+      WriteAsOperand(dbgs(), L);
        Instruction *I = Dep.getInst();
-      errs() << " is clobbered by " << *I << '\n';
+      dbgs() << " is clobbered by " << *I << '\n';
      );
      return false;
    }
@@ -1720,7 +1774,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
          if (StoredVal == 0)
            return false;
          
-        DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+        DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
                       << '\n' << *L << "\n\n\n");
        }
        else 
@@ -1749,7 +1803,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
          if (AvailableVal == 0)
            return false;
        
-        DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+        DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
                       << "\n" << *L << "\n\n\n");
        }
        else 
@@ -1917,7 +1971,7 @@ bool GVN::runOnFunction(Function& F) {
    unsigned Iteration = 0;
  
    while (ShouldContinue) {
-    DEBUG(errs() << "GVN iteration: " << Iteration << "\n");
+    DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
      ShouldContinue = iterateOnFunction(F);
      Changed |= ShouldContinue;
      ++Iteration;
@@ -1965,7 +2019,7 @@ bool GVN::processBlock(BasicBlock *BB) {
  
      for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
           E = toErase.end(); I != E; ++I) {
-      DEBUG(errs() << "GVN removed: " << **I << '\n');
+      DEBUG(dbgs() << "GVN removed: " << **I << '\n');
        if (MD) MD->removeInstruction(*I);
        (*I)->eraseFromParent();
        DEBUG(verifyRemoved(*I));
@@ -2123,7 +2177,7 @@ bool GVN::performPRE(Function &F) {
          MD->invalidateCachedPointerInfo(Phi);
        VN.erase(CurInst);
  
-      DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
+      DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
        if (MD) MD->removeInstruction(CurInst);
        CurInst->eraseFromParent();
        DEBUG(verifyRemoved(CurInst));