Make DataLayout a plain object, not a pass.

[oota-llvm.git] / lib / Transforms / InstCombine / InstructionCombining.cpp
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp

index 7f8c3ae55812914d8b4bfc8e177820b9b84eedba..f62e9395f0c562c45b4977601e5dfd9ea94834a0 100644 (file)
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -36,22 +36,23 @@
  #define DEBUG_TYPE "instcombine"
  #include "llvm/Transforms/Scalar.h"
  #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
  #include "llvm/Analysis/ConstantFolding.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
  #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "llvm/Support/PatternMatch.h"
  #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm-c/Initialization.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
  #include <algorithm>
  #include <climits>
  using namespace llvm;
@@ -65,6 +66,11 @@ STATISTIC(NumExpand,    "Number of expansions");
  STATISTIC(NumFactor   , "Number of factorizations");
  STATISTIC(NumReassoc  , "Number of reassociations");
  
+static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+                                   cl::init(false),
+                                   cl::desc("Enable unsafe double to float "
+                                            "shrinking for math lib calls"));
+
  // Initialization Routines
  void llvm::initializeInstCombine(PassRegistry &Registry) {
    initializeInstCombinerPass(Registry);
@@ -97,13 +103,13 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
  bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
    assert(From->isIntegerTy() && To->isIntegerTy());
  
-  // If we don't have TD, we don't know if the source/dest are legal.
-  if (!TD) return false;
+  // If we don't have DL, we don't know if the source/dest are legal.
+  if (!DL) return false;
  
    unsigned FromWidth = From->getPrimitiveSizeInBits();
    unsigned ToWidth = To->getPrimitiveSizeInBits();
-  bool FromLegal = TD->isLegalInteger(FromWidth);
-  bool ToLegal = TD->isLegalInteger(ToWidth);
+  bool FromLegal = DL->isLegalInteger(FromWidth);
+  bool ToLegal = DL->isLegalInteger(ToWidth);
  
    // If this is a legal integer from type, and the result would be an illegal
    // type, don't do the transformation.
@@ -156,6 +162,21 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
    return !Overflow;
  }
  
+/// Conservatively clears subclassOptionalData after a reassociation or
+/// commutation. We preserve fast-math flags when applicable as they can be
+/// preserved.
+static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
+  FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
+  if (!FPMO) {
+    I.clearSubclassOptionalData();
+    return;
+  }
+
+  FastMathFlags FMF = I.getFastMathFlags();
+  I.clearSubclassOptionalData();
+  I.setFastMathFlags(FMF);
+}
+
  /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
  /// operators which are associative or commutative:
  //
@@ -200,7 +221,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
          Value *C = I.getOperand(1);
  
          // Does "B op C" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) {
+        if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) {
            // It simplifies to V.  Form "A op V".
            I.setOperand(0, A);
            I.setOperand(1, V);
@@ -213,7 +234,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
              I.clearSubclassOptionalData();
              I.setHasNoSignedWrap(true);
            } else {
-            I.clearSubclassOptionalData();
+            ClearSubclassDataAfterReassociation(I);
            }
  
            Changed = true;
@@ -229,13 +250,13 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
          Value *C = Op1->getOperand(1);
  
          // Does "A op B" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) {
+        if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) {
            // It simplifies to V.  Form "V op C".
            I.setOperand(0, V);
            I.setOperand(1, C);
            // Conservatively clear the optional flags, since they may not be
            // preserved by the reassociation.
-          I.clearSubclassOptionalData();
+          ClearSubclassDataAfterReassociation(I);
            Changed = true;
            ++NumReassoc;
            continue;
@@ -251,13 +272,13 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
          Value *C = I.getOperand(1);
  
          // Does "C op A" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+        if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
            // It simplifies to V.  Form "V op B".
            I.setOperand(0, V);
            I.setOperand(1, B);
            // Conservatively clear the optional flags, since they may not be
            // preserved by the reassociation.
-          I.clearSubclassOptionalData();
+          ClearSubclassDataAfterReassociation(I);
            Changed = true;
            ++NumReassoc;
            continue;
@@ -271,13 +292,13 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
          Value *C = Op1->getOperand(1);
  
          // Does "C op A" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+        if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
            // It simplifies to V.  Form "B op V".
            I.setOperand(0, B);
            I.setOperand(1, V);
            // Conservatively clear the optional flags, since they may not be
            // preserved by the reassociation.
-          I.clearSubclassOptionalData();
+          ClearSubclassDataAfterReassociation(I);
            Changed = true;
            ++NumReassoc;
            continue;
@@ -298,13 +319,19 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
  
          Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
          BinaryOperator *New = BinaryOperator::Create(Opcode, A, B);
+        if (isa<FPMathOperator>(New)) {
+          FastMathFlags Flags = I.getFastMathFlags();
+          Flags &= Op0->getFastMathFlags();
+          Flags &= Op1->getFastMathFlags();
+          New->setFastMathFlags(Flags);
+        }
          InsertNewInstWith(New, I);
          New->takeName(Op1);
          I.setOperand(0, New);
          I.setOperand(1, Folded);
          // Conservatively clear the optional flags, since they may not be
          // preserved by the reassociation.
-        I.clearSubclassOptionalData();
+        ClearSubclassDataAfterReassociation(I);
  
          Changed = true;
          continue;
@@ -398,7 +425,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
            std::swap(C, D);
          // Consider forming "A op' (B op D)".
          // If "B op D" simplifies then it can be formed with no cost.
-        Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD);
+        Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
          // If "B op D" doesn't simplify then only go on if both of the existing
          // operations "A op' B" and "C op' D" will be zapped as no longer used.
          if (!V && Op0->hasOneUse() && Op1->hasOneUse())
@@ -420,7 +447,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
            std::swap(C, D);
          // Consider forming "(A op C) op' B".
          // If "A op C" simplifies then it can be formed with no cost.
-        Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD);
+        Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
          // If "A op C" doesn't simplify then only go on if both of the existing
          // operations "A op' B" and "C op' D" will be zapped as no longer used.
          if (!V && Op0->hasOneUse() && Op1->hasOneUse())
@@ -442,8 +469,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
      Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
  
      // Do "A op C" and "B op C" both simplify?
-    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD))
-      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) {
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) {
          // They do! Return "L op' R".
          ++NumExpand;
          // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
@@ -451,7 +478,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
              (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
            return Op0;
          // Otherwise return "L op' R" if it simplifies.
-        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
            return V;
          // Otherwise, create a new instruction.
          C = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -467,8 +494,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
      Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
  
      // Do "A op B" and "A op C" both simplify?
-    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD))
-      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) {
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) {
          // They do! Return "L op' R".
          ++NumExpand;
          // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
@@ -476,7 +503,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
              (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
            return Op1;
          // Otherwise return "L op' R" if it simplifies.
-        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
            return V;
          // Otherwise, create a new instruction.
          A = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -510,8 +537,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
  // instruction if the LHS is a constant negative zero (which is the 'negate'
  // form).
  //
-Value *InstCombiner::dyn_castFNegVal(Value *V) const {
-  if (BinaryOperator::isFNeg(V))
+Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
+  if (BinaryOperator::isFNeg(V, IgnoreZeroSign))
      return BinaryOperator::getFNegArgument(V);
  
    // Constants can be considered to be negated values if they can be folded.
@@ -545,9 +572,14 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
    if (!ConstIsRHS)
      std::swap(Op0, Op1);
  
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
-    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) {
+    Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
                                      SO->getName()+".op");
+    Instruction *FPInst = dyn_cast<Instruction>(RI);
+    if (FPInst && isa<FPMathOperator>(FPInst))
+      FPInst->copyFastMathFlags(BO);
+    return RI;
+  }
    if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
      return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
                                     SO->getName()+".cmp");
@@ -678,7 +710,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
        Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
        Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
        Value *InV = 0;
-      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+      // Beware of ConstantExpr:  it may eventually evaluate to getNullValue,
+      // even if currently isNullValue gives false.
+      Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
+      if (InC && !isa<ConstantExpr>(InC))
          InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
        else
          InV = Builder->CreateSelect(PN->getIncomingValue(i),
@@ -734,21 +769,27 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    return ReplaceInstUsesWith(I, NewPN);
  }
  
-/// FindElementAtOffset - Given a type and a constant offset, determine whether
-/// or not there is a sequence of GEP indices into the type that will land us at
-/// the specified offset.  If so, fill them into NewIndices and return the
-/// resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
-                                          SmallVectorImpl<Value*> &NewIndices) {
-  if (!TD) return 0;
-  if (!Ty->isSized()) return 0;
+/// FindElementAtOffset - Given a pointer type and a constant offset, determine
+/// whether or not there is a sequence of GEP indices into the pointed type that
+/// will land us at the specified offset.  If so, fill them into NewIndices and
+/// return the resultant element type, otherwise return null.
+Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
+                                        SmallVectorImpl<Value*> &NewIndices) {
+  assert(PtrTy->isPtrOrPtrVectorTy());
+
+  if (!DL)
+    return 0;
+
+  Type *Ty = PtrTy->getPointerElementType();
+  if (!Ty->isSized())
+    return 0;
  
    // Start with the index over the outer type.  Note that the type size
    // might be zero (even if the offset isn't zero) if the indexed type
    // is something like [0 x {int, int}]
-  Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+  Type *IntPtrTy = DL->getIntPtrType(PtrTy);
    int64_t FirstIdx = 0;
-  if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
+  if (int64_t TySize = DL->getTypeAllocSize(Ty)) {
      FirstIdx = Offset/TySize;
      Offset -= FirstIdx*TySize;
  
@@ -766,11 +807,11 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
    // Index into the types.  If we fail, set OrigBase to null.
    while (Offset) {
      // Indexing into tail padding between struct/array elements.
-    if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+    if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
        return 0;
  
      if (StructType *STy = dyn_cast<StructType>(Ty)) {
-      const StructLayout *SL = TD->getStructLayout(STy);
+      const StructLayout *SL = DL->getStructLayout(STy);
        assert(Offset < (int64_t)SL->getSizeInBytes() &&
               "Offset must stay within the indexed type");
  
@@ -781,7 +822,7 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
        Offset -= SL->getElementOffset(Elt);
        Ty = STy->getElementType(Elt);
      } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
-      uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
+      uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType());
        assert(EltSize && "Cannot index into a zero-sized array");
        NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
        Offset %= EltSize;
@@ -1046,16 +1087,16 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
  Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
  
-  if (Value *V = SimplifyGEPInst(Ops, TD))
+  if (Value *V = SimplifyGEPInst(Ops, DL))
      return ReplaceInstUsesWith(GEP, V);
  
    Value *PtrOp = GEP.getOperand(0);
  
    // Eliminate unneeded casts for indices, and replace indices which displace
    // by multiples of a zero size type with zero.
-  if (TD) {
+  if (DL) {
      bool MadeChange = false;
-    Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+    Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType());
  
      gep_type_iterator GTI = gep_type_begin(GEP);
      for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
@@ -1067,14 +1108,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        // If the element type has zero size then any index over it is equivalent
        // to an index of zero, so replace it with zero if it is not zero already.
        if (SeqTy->getElementType()->isSized() &&
-          TD->getTypeAllocSize(SeqTy->getElementType()) == 0)
+          DL->getTypeAllocSize(SeqTy->getElementType()) == 0)
          if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
            *I = Constant::getNullValue(IntPtrTy);
            MadeChange = true;
          }
  
        Type *IndexTy = (*I)->getType();
-      if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) {
+      if (IndexTy != IntPtrTy) {
          // If we are using a wider index than needed for this platform, shrink
          // it to what we need.  If narrower, sign-extend it to what we need.
          // This explicit cast can make subsequent optimizations more obvious.
@@ -1155,6 +1196,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
          GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
    }
  
+  // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y))
+  // The GEP pattern is emitted by the SCEV expander for certain kinds of
+  // pointer arithmetic.
+  if (DL && GEP.getNumIndices() == 1 &&
+      match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) {
+    unsigned AS = GEP.getPointerAddressSpace();
+    if (GEP.getType() == Builder->getInt8PtrTy(AS) &&
+        GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
+        DL->getPointerSizeInBits(AS)) {
+      Operator *Index = cast<Operator>(GEP.getOperand(1));
+      Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
+      Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+      return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+    }
+  }
+
    // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
    Value *StrippedPtr = PtrOp->stripPointerCasts();
    PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
@@ -1163,9 +1220,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    if (!StrippedPtrTy)
      return 0;
  
-  if (StrippedPtr != PtrOp &&
-    StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
-
+  if (StrippedPtr != PtrOp) {
      bool HasZeroPointerIndex = false;
      if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
        HasZeroPointerIndex = C->isZero();
@@ -1210,29 +1265,31 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
        // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
        Type *SrcElTy = StrippedPtrTy->getElementType();
-      Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
-      if (TD && SrcElTy->isArrayTy() &&
-          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
-          TD->getTypeAllocSize(ResElTy)) {
-        Value *Idx[2];
-        Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
-        Idx[1] = GEP.getOperand(1);
+      Type *ResElTy = PtrOp->getType()->getPointerElementType();
+      if (DL && SrcElTy->isArrayTy() &&
+          DL->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
+          DL->getTypeAllocSize(ResElTy)) {
+        Type *IdxType = DL->getIntPtrType(GEP.getType());
+        Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
          Value *NewGEP = GEP.isInBounds() ?
            Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
            Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+
          // V and GEP are both pointer types --> BitCast
-        return new BitCastInst(NewGEP, GEP.getType());
+        if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace())
+          return new BitCastInst(NewGEP, GEP.getType());
+        return new AddrSpaceCastInst(NewGEP, GEP.getType());
        }
  
        // Transform things like:
        // %V = mul i64 %N, 4
        // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
        // into:  %t1 = getelementptr i32* %arr, i32 %N; bitcast
-      if (TD && ResElTy->isSized() && SrcElTy->isSized()) {
+      if (DL && ResElTy->isSized() && SrcElTy->isSized()) {
          // Check that changing the type amounts to dividing the index by a scale
          // factor.
-        uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
-        uint64_t SrcSize = TD->getTypeAllocSize(SrcElTy);
+        uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
+        uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy);
          if (ResSize && SrcSize % ResSize == 0) {
            Value *Idx = GEP.getOperand(1);
            unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1240,7 +1297,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
  
            // Earlier transforms ensure that the index has type IntPtrType, which
            // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+          assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
                   "Index not cast to pointer width?");
  
            bool NSW;
@@ -1251,8 +1308,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
              Value *NewGEP = GEP.isInBounds() && NSW ?
                Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
                Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+
              // The NewGEP must be pointer typed, so must the old one -> BitCast
-            return new BitCastInst(NewGEP, GEP.getType());
+            if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace())
+              return new BitCastInst(NewGEP, GEP.getType());
+            return new AddrSpaceCastInst(NewGEP, GEP.getType());
            }
          }
        }
@@ -1261,13 +1321,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
        //   (where tmp = 8*tmp2) into:
        // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
-      if (TD && ResElTy->isSized() && SrcElTy->isSized() &&
+      if (DL && ResElTy->isSized() && SrcElTy->isSized() &&
            SrcElTy->isArrayTy()) {
          // Check that changing to the array element type amounts to dividing the
          // index by a scale factor.
-        uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
-        uint64_t ArrayEltSize =
-          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+        uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
+        uint64_t ArrayEltSize
+          = DL->getTypeAllocSize(SrcElTy->getArrayElementType());
          if (ResSize && ArrayEltSize % ResSize == 0) {
            Value *Idx = GEP.getOperand(1);
            unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1275,7 +1335,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
  
            // Earlier transforms ensure that the index has type IntPtrType, which
            // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+          assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
                   "Index not cast to pointer width?");
  
            bool NSW;
@@ -1283,41 +1343,47 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
              // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
              // If the multiplication NewIdx * Scale may overflow then the new
              // GEP may not be "inbounds".
-            Value *Off[2];
-            Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
-            Off[1] = NewIdx;
+            Value *Off[2] = {
+              Constant::getNullValue(DL->getIntPtrType(GEP.getType())),
+              NewIdx
+            };
+
              Value *NewGEP = GEP.isInBounds() && NSW ?
                Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
                Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
              // The NewGEP must be pointer typed, so must the old one -> BitCast
-            return new BitCastInst(NewGEP, GEP.getType());
+            if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace())
+              return new BitCastInst(NewGEP, GEP.getType());
+            return new AddrSpaceCastInst(NewGEP, GEP.getType());
            }
          }
        }
      }
    }
  
+  if (!DL)
+    return 0;
+
    /// See if we can simplify:
    ///   X = bitcast A* to B*
    ///   Y = gep X, <...constant indices...>
    /// into a gep of the original struct.  This is important for SROA and alias
    /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
    if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
-    if (TD &&
-        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() &&
+    Value *Operand = BCI->getOperand(0);
+    PointerType *OpType = cast<PointerType>(Operand->getType());
+    unsigned OffsetBits = DL->getPointerTypeSizeInBits(OpType);
+    APInt Offset(OffsetBits, 0);
+    if (!isa<BitCastInst>(Operand) &&
+        GEP.accumulateConstantOffset(*DL, Offset) &&
          StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
  
-      // Determine how much the GEP moves the pointer.
-      SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end());
-      int64_t Offset = TD->getIndexedOffset(GEP.getPointerOperandType(), Ops);
-
        // If this GEP instruction doesn't move the pointer, just replace the GEP
        // with a bitcast of the real input to the dest type.
-      if (Offset == 0) {
+      if (!Offset) {
          // If the bitcast is of an allocation, and the allocation will be
          // converted to match the type of the cast, don't touch this.
-        if (isa<AllocaInst>(BCI->getOperand(0)) ||
-            isAllocationFn(BCI->getOperand(0), TLI)) {
+        if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) {
            // See if the bitcast simplifies, if so, don't nuke this GEP yet.
            if (Instruction *I = visitBitCast(*BCI)) {
              if (I != BCI) {
@@ -1328,19 +1394,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
              return &GEP;
            }
          }
-        return new BitCastInst(BCI->getOperand(0), GEP.getType());
+        return new BitCastInst(Operand, GEP.getType());
        }
  
        // Otherwise, if the offset is non-zero, we need to find out if there is a
        // field at Offset in 'A's type.  If so, we can pull the cast through the
        // GEP.
        SmallVector<Value*, 8> NewIndices;
-      Type *InTy =
-        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
-      if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+      if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
          Value *NGEP = GEP.isInBounds() ?
-          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
-          Builder->CreateGEP(BCI->getOperand(0), NewIndices);
+          Builder->CreateInBoundsGEP(Operand, NewIndices) :
+          Builder->CreateGEP(Operand, NewIndices);
  
          if (NGEP->getType() == GEP.getType())
            return ReplaceInstUsesWith(GEP, NGEP);
@@ -1353,8 +1417,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    return 0;
  }
  
-
-
  static bool
  isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
                       const TargetLibraryInfo *TLI) {
@@ -1464,13 +1526,69 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
        Module *M = II->getParent()->getParent()->getParent();
        Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
        InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
-                         ArrayRef<Value *>(), "", II->getParent());
+                         None, "", II->getParent());
      }
      return EraseInstFromFunction(MI);
    }
    return 0;
  }
  
+/// \brief Move the call to free before a NULL test.
+///
+/// Check if this free is accessed after its argument has been test
+/// against NULL (property 0).
+/// If yes, it is legal to move this call in its predecessor block.
+///
+/// The move is performed only if the block containing the call to free
+/// will be removed, i.e.:
+/// 1. it has only one predecessor P, and P has two successors
+/// 2. it contains the call and an unconditional branch
+/// 3. its successor is the same as its predecessor's successor
+///
+/// The profitability is out-of concern here and this function should
+/// be called only if the caller knows this transformation would be
+/// profitable (e.g., for code size).
+static Instruction *
+tryToMoveFreeBeforeNullTest(CallInst &FI) {
+  Value *Op = FI.getArgOperand(0);
+  BasicBlock *FreeInstrBB = FI.getParent();
+  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
+
+  // Validate part of constraint #1: Only one predecessor
+  // FIXME: We can extend the number of predecessor, but in that case, we
+  //        would duplicate the call to free in each predecessor and it may
+  //        not be profitable even for code size.
+  if (!PredBB)
+    return 0;
+
+  // Validate constraint #2: Does this block contains only the call to
+  //                         free and an unconditional branch?
+  // FIXME: We could check if we can speculate everything in the
+  //        predecessor block
+  if (FreeInstrBB->size() != 2)
+    return 0;
+  BasicBlock *SuccBB;
+  if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
+    return 0;
+
+  // Validate the rest of constraint #1 by matching on the pred branch.
+  TerminatorInst *TI = PredBB->getTerminator();
+  BasicBlock *TrueBB, *FalseBB;
+  ICmpInst::Predicate Pred;
+  if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
+    return 0;
+  if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
+    return 0;
+
+  // Validate constraint #3: Ensure the null case just falls through.
+  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
+    return 0;
+  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
+         "Broken CFG: missing edge from predecessor to successor");
+
+  FI.moveBefore(TI);
+  return &FI;
+}
  
  
  Instruction *InstCombiner::visitFree(CallInst &FI) {
@@ -1489,6 +1607,16 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
    if (isa<ConstantPointerNull>(Op))
      return EraseInstFromFunction(FI);
  
+  // If we optimize for code size, try to move the call to free before the null
+  // test so that simplify cfg can remove the empty block and dead code
+  // elimination the branch. I.e., helps to turn something like:
+  // if (foo) free(foo);
+  // into
+  // free(foo);
+  if (MinimizeSize)
+    if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
+      return I;
+
    return 0;
  }
  
@@ -1507,7 +1635,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
      return &BI;
    }
  
-  // Cannonicalize fcmp_one -> fcmp_oeq
+  // Canonicalize fcmp_one -> fcmp_oeq
    FCmpInst::Predicate FPred; Value *Y;
    if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
                               TrueDest, FalseDest)) &&
@@ -1523,7 +1651,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
        return &BI;
      }
  
-  // Cannonicalize icmp_ne -> icmp_eq
+  // Canonicalize icmp_ne -> icmp_eq
    ICmpInst::Predicate IPred;
    if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
                        TrueDest, FalseDest)) &&
@@ -1957,7 +2085,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
          continue;
        // If Filter is a subset of LFilter, i.e. every element of Filter is also
        // an element of LFilter, then discard LFilter.
-      SmallVector<Value *, 16>::iterator J = NewClauses.begin() + j;
+      SmallVectorImpl<Value *>::iterator J = NewClauses.begin() + j;
        // If Filter is empty then it is a subset of LFilter.
        if (!FElts) {
          // Discard LFilter.
@@ -2103,7 +2231,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
  static bool AddReachableCodeToWorklist(BasicBlock *BB,
                                         SmallPtrSet<BasicBlock*, 64> &Visited,
                                         InstCombiner &IC,
-                                       const DataLayout *TD,
+                                       const DataLayout *DL,
                                         const TargetLibraryInfo *TLI) {
    bool MadeIRChange = false;
    SmallVector<BasicBlock*, 256> Worklist;
@@ -2124,15 +2252,15 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
        // DCE instruction if trivially dead.
        if (isInstructionTriviallyDead(Inst, TLI)) {
          ++NumDeadInst;
-        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+        DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
          Inst->eraseFromParent();
          continue;
        }
  
        // ConstantProp instruction if trivially constant.
        if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
-        if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
-          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+        if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
+          DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
                         << *Inst << '\n');
            Inst->replaceAllUsesWith(C);
            ++NumConstProp;
@@ -2140,7 +2268,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
            continue;
          }
  
-      if (TD) {
+      if (DL) {
          // See if we can constant fold its operands.
          for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
               i != e; ++i) {
@@ -2149,7 +2277,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
  
            Constant*& FoldRes = FoldedConstants[CE];
            if (!FoldRes)
-            FoldRes = ConstantFoldConstantExpression(CE, TD, TLI);
+            FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
            if (!FoldRes)
              FoldRes = CE;
  
@@ -2208,7 +2336,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
  bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
    MadeIRChange = false;
  
-  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+  DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
                 << F.getName() << "\n");
  
    {
@@ -2216,7 +2344,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
      // the reachable instructions.  Ignore blocks that are not reachable.  Keep
      // track of which blocks we visit.
      SmallPtrSet<BasicBlock*, 64> Visited;
-    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD,
+    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, DL,
                                                 TLI);
  
      // Do a quick scan over the function.  If we find any blocks that are
@@ -2253,7 +2381,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
  
      // Check to see if we can DCE the instruction.
      if (isInstructionTriviallyDead(I, TLI)) {
-      DEBUG(errs() << "IC: DCE: " << *I << '\n');
+      DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
        EraseInstFromFunction(*I);
        ++NumDeadInst;
        MadeIRChange = true;
@@ -2262,8 +2390,8 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
  
      // Instruction isn't dead, see if we can constant propagate it.
      if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
-      if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
-        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+      if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
+        DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
  
          // Add operands to the worklist.
          ReplaceInstUsesWith(*I, C);
@@ -2311,13 +2439,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
      std::string OrigI;
  #endif
      DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
-    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+    DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
  
      if (Instruction *Result = visit(*I)) {
        ++NumCombined;
        // Should we replace the old instruction with a new one?
        if (Result != I) {
-        DEBUG(errs() << "IC: Old = " << *I << '\n'
+        DEBUG(dbgs() << "IC: Old = " << *I << '\n'
                       << "    New = " << *Result << '\n');
  
          if (!I->getDebugLoc().isUnknown())
@@ -2346,7 +2474,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
          EraseInstFromFunction(*I);
        } else {
  #ifndef NDEBUG
-        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+        DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
                       << "    New = " << *I << '\n');
  #endif
  
@@ -2367,19 +2495,44 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
    return MadeIRChange;
  }
  
+namespace {
+class InstCombinerLibCallSimplifier : public LibCallSimplifier {
+  InstCombiner *IC;
+public:
+  InstCombinerLibCallSimplifier(const DataLayout *DL,
+                                const TargetLibraryInfo *TLI,
+                                InstCombiner *IC)
+    : LibCallSimplifier(DL, TLI, UnsafeFPShrink) {
+    this->IC = IC;
+  }
+
+  /// replaceAllUsesWith - override so that instruction replacement
+  /// can be defined in terms of the instruction combiner framework.
+  virtual void replaceAllUsesWith(Instruction *I, Value *With) const {
+    IC->ReplaceInstUsesWith(*I, With);
+  }
+};
+}
  
  bool InstCombiner::runOnFunction(Function &F) {
-  TD = getAnalysisIfAvailable<DataLayout>();
+  if (skipOptnoneFunction(F))
+    return false;
+
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : 0;
    TLI = &getAnalysis<TargetLibraryInfo>();
+  // Minimizing size?
+  MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                Attribute::MinSize);
  
    /// Builder - This is an IRBuilder that automatically inserts new
    /// instructions into the worklist when they are created.
    IRBuilder<true, TargetFolder, InstCombineIRInserter>
-    TheBuilder(F.getContext(), TargetFolder(TD),
+    TheBuilder(F.getContext(), TargetFolder(DL),
                 InstCombineIRInserter(Worklist));
    Builder = &TheBuilder;
  
-  LibCallSimplifier TheSimplifier(TD, TLI);
+  InstCombinerLibCallSimplifier TheSimplifier(DL, TLI, this);
    Simplifier = &TheSimplifier;
  
    bool EverMadeChange = false;