Remove FreeInst.

[oota-llvm.git] / lib / Transforms / Scalar / InstructionCombining.cpp
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index 4b66015380a215848ec123ca4d3e9ecdfa8cfba0..fa84bb5c493d4a9b8394708fdd26cde9d7680f88 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -42,6 +42,7 @@
  #include "llvm/GlobalVariable.h"
  #include "llvm/Operator.h"
  #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MallocHelper.h"
  #include "llvm/Analysis/ValueTracking.h"
  #include "llvm/Target/TargetData.h"
  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -55,7 +56,7 @@
  #include "llvm/Support/IRBuilder.h"
  #include "llvm/Support/MathExtras.h"
  #include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/TargetFolder.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/ADT/DenseMap.h"
  #include "llvm/ADT/SmallVector.h"
@@ -90,8 +91,10 @@ namespace {
      /// Add - Add the specified instruction to the worklist if it isn't already
      /// in it.
      void Add(Instruction *I) {
-      if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second)
+      if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+        DEBUG(errs() << "IC: ADD: " << *I << '\n');
          Worklist.push_back(I);
+      }
      }
      
      void AddValue(Value *V) {
@@ -99,6 +102,20 @@ namespace {
          Add(I);
      }
      
+    /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+    /// which should only be done when the worklist is empty and when the group
+    /// has no duplicates.
+    void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+      assert(Worklist.empty() && "Worklist must be empty to add initial group");
+      Worklist.reserve(NumEntries+16);
+      DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+      for (; NumEntries; --NumEntries) {
+        Instruction *I = List[NumEntries-1];
+        WorklistMap.insert(std::make_pair(I, Worklist.size()));
+        Worklist.push_back(I);
+      }
+    }
+    
      // Remove - remove I from the worklist if it exists.
      void Remove(Instruction *I) {
        DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
@@ -159,18 +176,18 @@ namespace {
  
  
  namespace {
-  class VISIBILITY_HIDDEN InstCombiner
-    : public FunctionPass,
-      public InstVisitor<InstCombiner, Instruction*> {
+  class InstCombiner : public FunctionPass,
+                       public InstVisitor<InstCombiner, Instruction*> {
      TargetData *TD;
      bool MustPreserveLCSSA;
+    bool MadeIRChange;
    public:
      /// Worklist - All of the instructions that need to be simplified.
      InstCombineWorklist Worklist;
  
      /// Builder - This is an IRBuilder that automatically inserts new
      /// instructions into the worklist when they are created.
-    typedef IRBuilder<true, ConstantFolder, InstCombineIRInserter> BuilderTy;
+    typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
      BuilderTy *Builder;
          
      static char ID; // Pass identification, replacement for typeid
@@ -267,8 +284,8 @@ namespace {
      Instruction *visitInvokeInst(InvokeInst &II);
      Instruction *visitPHINode(PHINode &PN);
      Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
-    Instruction *visitAllocationInst(AllocationInst &AI);
-    Instruction *visitFreeInst(FreeInst &FI);
+    Instruction *visitAllocaInst(AllocaInst &AI);
+    Instruction *visitFree(Instruction &FI);
      Instruction *visitLoadInst(LoadInst &LI);
      Instruction *visitStoreInst(StoreInst &SI);
      Instruction *visitBranchInst(BranchInst &BI);
@@ -303,27 +320,7 @@ namespace {
        Worklist.Add(New);
        return New;
      }
-
-    /// InsertCastBefore - Insert a cast of V to TY before the instruction POS.
-    /// This also adds the cast to the worklist.  Finally, this returns the
-    /// cast.
-    Value *InsertCastBefore(Instruction::CastOps opc, Value *V, const Type *Ty,
-                            Instruction &Pos) {
-      if (V->getType() == Ty) return V;
-
-      if (Constant *CV = dyn_cast<Constant>(V))
-        return ConstantExpr::getCast(opc, CV, Ty);
-      
-      Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos);
-      Worklist.Add(C);
-      return C;
-    }
          
-    Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) {
-      return InsertCastBefore(Instruction::BitCast, V, Ty, Pos);
-    }
-
-
      // ReplaceInstUsesWith - This method is to be used when an instruction is
      // found to be dead, replacable with another preexisting expression.  Here
      // we add all uses of I to the worklist, replace all uses of I with the new
@@ -347,6 +344,8 @@ namespace {
      // instruction.  Instead, visit methods should return the value returned by
      // this function.
      Instruction *EraseInstFromFunction(Instruction &I) {
+      DEBUG(errs() << "IC: ERASE " << I << '\n');
+
        assert(I.use_empty() && "Cannot erase instruction that is used!");
        // Make sure that we reprocess all operands now that we reduced their
        // use counts.
@@ -357,6 +356,7 @@ namespace {
        }
        Worklist.Remove(&I);
        I.eraseFromParent();
+      MadeIRChange = true;
        return 0;  // Don't do anything with FI
      }
          
@@ -400,10 +400,15 @@ namespace {
      Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                        APInt& UndefElts, unsigned Depth = 0);
        
-    // FoldOpIntoPhi - Given a binary operator or cast instruction which has a
-    // PHI node as operand #0, see if we can fold the instruction into the PHI
-    // (which is only possible if all operands to the PHI are constants).
-    Instruction *FoldOpIntoPhi(Instruction &I);
+    // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+    // which has a PHI node as operand #0, see if we can fold the instruction
+    // into the PHI (which is only possible if all operands to the PHI are
+    // constants).
+    //
+    // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+    // that would normally be unprofitable because they strongly encourage jump
+    // threading.
+    Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);
  
      // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
      // operator and they all are only used by the PHI, PHI together their
@@ -420,7 +425,7 @@ namespace {
                                bool isSub, Instruction &I);
      Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                   bool isSigned, bool Inside, Instruction &IB);
-    Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI);
+    Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
      Instruction *MatchBSwap(BinaryOperator &I);
      bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
      Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
@@ -625,9 +630,32 @@ static inline Value *dyn_castFNegVal(Value *V) {
    return 0;
  }
  
-static inline Value *dyn_castNotVal(Value *V) {
+/// isFreeToInvert - Return true if the specified value is free to invert (apply
+/// ~ to).  This happens in cases where the ~ can be eliminated.
+static inline bool isFreeToInvert(Value *V) {
+  // ~(~(X)) -> X.
    if (BinaryOperator::isNot(V))
-    return BinaryOperator::getNotArgument(V);
+    return true;
+  
+  // Constants can be considered to be not'ed values.
+  if (isa<ConstantInt>(V))
+    return true;
+  
+  // Compares can be inverted if they have a single use.
+  if (CmpInst *CI = dyn_cast<CmpInst>(V))
+    return CI->hasOneUse();
+  
+  return false;
+}
+
+static inline Value *dyn_castNotVal(Value *V) {
+  // If this is not(not(x)) don't return that this is a not: we want the two
+  // not's to be folded first.
+  if (BinaryOperator::isNot(V)) {
+    Value *Operand = BinaryOperator::getNotArgument(V);
+    if (!isFreeToInvert(Operand))
+      return Operand;
+  }
  
    // Constants can be considered to be not'ed values...
    if (ConstantInt *C = dyn_cast<ConstantInt>(V))
@@ -635,6 +663,8 @@ static inline Value *dyn_castNotVal(Value *V) {
    return 0;
  }
  
+
+
  // dyn_castFoldableMul - If this value is a multiply that can be folded into
  // other computations (because it has a constant operand), return the
  // non-constant operand of the multiply, and set CST to point to the multiplier.
@@ -785,7 +815,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
    Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
                                            KnownZero, KnownOne, Depth);
    if (NewVal == 0) return false;
-  U.set(NewVal);
+  U = NewVal;
    return true;
  }
  
@@ -1030,8 +1060,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
      // If all of the demanded bits are known to be zero on one side or the
      // other, turn this into an *inclusive* or.
      //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
-    if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0)
-      return Builder->CreateOr(I->getOperand(0), I->getOperand(1),I->getName());
+    if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+      Instruction *Or = 
+        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+                                 I->getName());
+      return InsertNewInstBefore(Or, *I);
+    }
      
      // If all of the demanded bits on one side are known, and all of the set
      // bits on that side are also known to be set on the other side, turn this
@@ -1053,6 +1087,33 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
      if (ShrinkDemandedConstant(I, 1, DemandedMask))
        return I;
      
+    // If our LHS is an 'and' and if it has one use, and if any of the bits we
+    // are flipping are known to be set, then the xor is just resetting those
+    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
+    // simplifying both of them.
+    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+          isa<ConstantInt>(I->getOperand(1)) &&
+          isa<ConstantInt>(LHSInst->getOperand(1)) &&
+          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+        
+        Constant *AndC =
+          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+        Instruction *NewAnd = 
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+        InsertNewInstBefore(NewAnd, *I);
+        
+        Constant *XorC =
+          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+        Instruction *NewXor =
+          BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+        return InsertNewInstBefore(NewXor, *I);
+      }
+          
+          
      RHSKnownZero = KnownZeroOut;
      RHSKnownOne  = KnownOneOut;
      break;
@@ -1896,9 +1957,8 @@ struct AddMaskingAnd {
  
  static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
                                               InstCombiner *IC) {
-  if (CastInst *CI = dyn_cast<CastInst>(&I)) {
-    return IC->InsertCastBefore(CI->getOpcode(), SO, I.getType(), I);
-  }
+  if (CastInst *CI = dyn_cast<CastInst>(&I))
+    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
  
    // Figure out if the constant is the left or the right argument.
    bool ConstIsRHS = isa<Constant>(I.getOperand(1));
@@ -1951,20 +2011,34 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
  }
  
  
-/// FoldOpIntoPhi - Given a binary operator or cast instruction which has a PHI
-/// node as operand #0, see if we can fold the instruction into the PHI (which
-/// is only possible if all operands to the PHI are constants).
-Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
+/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
+/// has a PHI node as operand #0, see if we can fold the instruction into the
+/// PHI (which is only possible if all operands to the PHI are constants).
+///
+/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+/// that would normally be unprofitable because they strongly encourage jump
+/// threading.
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
+                                         bool AllowAggressive) {
+  AllowAggressive = false;
    PHINode *PN = cast<PHINode>(I.getOperand(0));
    unsigned NumPHIValues = PN->getNumIncomingValues();
-  if (!PN->hasOneUse() || NumPHIValues == 0) return 0;
-
-  // Check to see if all of the operands of the PHI are constants.  If there is
-  // one non-constant value, remember the BB it is.  If there is more than one
-  // or if *it* is a PHI, bail out.
+  if (NumPHIValues == 0 ||
+      // We normally only transform phis with a single use, unless we're trying
+      // hard to make jump threading happen.
+      (!PN->hasOneUse() && !AllowAggressive))
+    return 0;
+  
+  
+  // Check to see if all of the operands of the PHI are simple constants
+  // (constantint/constantfp/undef).  If there is one non-constant value,
+  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
+  // bail out.  We don't do arbitrary constant expressions here because moving
+  // their computation can be expensive without a cost model.
    BasicBlock *NonConstBB = 0;
    for (unsigned i = 0; i != NumPHIValues; ++i)
-    if (!isa<Constant>(PN->getIncomingValue(i))) {
+    if (!isa<Constant>(PN->getIncomingValue(i)) ||
+        isa<ConstantExpr>(PN->getIncomingValue(i))) {
        if (NonConstBB) return 0;  // More than one non-const value.
        if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.
        NonConstBB = PN->getIncomingBlock(i);
@@ -1979,7 +2053,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    // operation in that block.  However, if this is a critical edge, we would be
    // inserting the computation one some other paths (e.g. inside a loop).  Only
    // do this if the pred block is unconditionally branching into the phi block.
-  if (NonConstBB) {
+  if (NonConstBB != 0 && !AllowAggressive) {
      BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
      if (!BI || !BI->isUnconditional()) return 0;
    }
@@ -1991,7 +2065,29 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    NewPN->takeName(PN);
  
    // Next, add all of the operands to the PHI.
-  if (I.getNumOperands() == 2) {
+  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+    // We only currently try to fold the condition of a select when it is a phi,
+    // not the true/false values.
+    Value *TrueV = SI->getTrueValue();
+    Value *FalseV = SI->getFalseValue();
+    BasicBlock *PhiTransBB = PN->getParent();
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      BasicBlock *ThisBB = PN->getIncomingBlock(i);
+      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+      } else {
+        assert(PN->getIncomingBlock(i) == NonConstBB);
+        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
+                                 FalseVInPred,
+                                 "phitmp", NonConstBB->getTerminator());
+        Worklist.Add(cast<Instruction>(InV));
+      }
+      NewPN->addIncoming(InV, ThisBB);
+    }
+  } else if (I.getNumOperands() == 2) {
      Constant *C = cast<Constant>(I.getOperand(1));
      for (unsigned i = 0; i != NumPHIValues; ++i) {
        Value *InV = 0;
@@ -2323,8 +2419,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
            ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
            WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
          // Insert the new, smaller add.
-        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
-                                           CI, "addconv");
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              CI, "addconv");
          return new SExtInst(NewAdd, I.getType());
        }
      }
@@ -2339,8 +2435,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
            WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                     RHSConv->getOperand(0))) {
          // Insert the new integer add.
-        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
-                                           RHSConv->getOperand(0), "addconv");
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              RHSConv->getOperand(0), "addconv");
          return new SExtInst(NewAdd, I.getType());
        }
      }
@@ -2396,8 +2492,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
            ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
            WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
          // Insert the new integer add.
-        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0),
-                                           CI, "addconv");
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+                                              CI, "addconv");
          return new SIToFPInst(NewAdd, I.getType());
        }
      }
@@ -2412,8 +2508,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
            WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                     RHSConv->getOperand(0))) {
          // Insert the new integer add.
-        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
-                                           RHSConv->getOperand(0), "addconv");
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              RHSConv->getOperand(0), "addconv");
          return new SIToFPInst(NewAdd, I.getType());
        }
      }
@@ -2627,14 +2723,14 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
  
  Instruction *InstCombiner::visitMul(BinaryOperator &I) {
    bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
-  if (isa<UndefValue>(I.getOperand(1)))              // undef * X -> 0
+  if (isa<UndefValue>(Op1))              // undef * X -> 0
      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
  
-  // Simplify mul instructions with a constant RHS...
-  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+  // Simplify mul instructions with a constant RHS.
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
  
        // ((X << C1)*C2) == (X * (C2 << C1))
        if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
@@ -2644,7 +2740,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
                                          ConstantExpr::getShl(CI, ShOp));
  
        if (CI->isZero())
-        return ReplaceInstUsesWith(I, Op1);  // X * 0  == 0
+        return ReplaceInstUsesWith(I, Op1C);  // X * 0  == 0
        if (CI->equalsInt(1))                  // X * 1  == X
          return ReplaceInstUsesWith(I, Op0);
        if (CI->isAllOnesValue())              // X * -1 == 0 - X
@@ -2655,11 +2751,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
          return BinaryOperator::CreateShl(Op0,
                   ConstantInt::get(Op0->getType(), Val.logBase2()));
        }
-    } else if (isa<VectorType>(Op1->getType())) {
-      if (Op1->isNullValue())
-        return ReplaceInstUsesWith(I, Op1);
+    } else if (isa<VectorType>(Op1C->getType())) {
+      if (Op1C->isNullValue())
+        return ReplaceInstUsesWith(I, Op1C);
  
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
          if (Op1V->isAllOnesValue())              // X * -1 == 0 - X
            return BinaryOperator::CreateNeg(Op0, I.getName());
  
@@ -2674,10 +2770,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
      
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
        if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
-          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1)) {
+          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
          // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
-        Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1, "tmp");
-        Value *C1C2 = Builder->CreateMul(Op1, Op0I->getOperand(1));
+        Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
+        Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
          return BinaryOperator::CreateAdd(Add, C1C2);
          
        }
@@ -2693,23 +2789,23 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
    }
  
    if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castNegVal(I.getOperand(1)))
+    if (Value *Op1v = dyn_castNegVal(Op1))
        return BinaryOperator::CreateMul(Op0v, Op1v);
  
    // (X / Y) *  Y = X - (X % Y)
    // (X / Y) * -Y = (X % Y) - X
    {
-    Value *Op1 = I.getOperand(1);
+    Value *Op1C = Op1;
      BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
      if (!BO ||
          (BO->getOpcode() != Instruction::UDiv && 
           BO->getOpcode() != Instruction::SDiv)) {
-      Op1 = Op0;
-      BO = dyn_cast<BinaryOperator>(I.getOperand(1));
+      Op1C = Op0;
+      BO = dyn_cast<BinaryOperator>(Op1);
      }
-    Value *Neg = dyn_castNegVal(Op1);
+    Value *Neg = dyn_castNegVal(Op1C);
      if (BO && BO->hasOneUse() &&
-        (BO->getOperand(1) == Op1 || BO->getOperand(1) == Neg) &&
+        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
          (BO->getOpcode() == Instruction::UDiv ||
           BO->getOpcode() == Instruction::SDiv)) {
        Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
@@ -2717,10 +2813,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
        // If the division is exact, X % Y is zero.
        if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
          if (SDiv->isExact()) {
-          if (Op1BO == Op1)
+          if (Op1BO == Op1C)
              return ReplaceInstUsesWith(I, Op0BO);
-          else
-            return BinaryOperator::CreateNeg(Op0BO);
+          return BinaryOperator::CreateNeg(Op0BO);
          }
  
        Value *Rem;
@@ -2730,58 +2825,43 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
          Rem = Builder->CreateSRem(Op0BO, Op1BO);
        Rem->takeName(BO);
  
-      if (Op1BO == Op1)
+      if (Op1BO == Op1C)
          return BinaryOperator::CreateSub(Op0BO, Rem);
        return BinaryOperator::CreateSub(Rem, Op0BO);
      }
    }
  
+  /// i1 mul -> i1 and.
    if (I.getType() == Type::getInt1Ty(*Context))
-    return BinaryOperator::CreateAnd(Op0, I.getOperand(1));
+    return BinaryOperator::CreateAnd(Op0, Op1);
  
+  // X*(1 << Y) --> X << Y
+  // (1 << Y)*X --> X << Y
+  {
+    Value *Y;
+    if (match(Op0, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op1, Y);
+    if (match(Op1, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op0, Y);
+  }
+  
    // If one of the operands of the multiply is a cast from a boolean value, then
    // we know the bool is either zero or one, so this is a 'masking' multiply.
-  // See if we can simplify things based on how the boolean was originally
-  // formed.
-  CastInst *BoolCast = 0;
-  if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0))
-    if (CI->getOperand(0)->getType() == Type::getInt1Ty(*Context))
-      BoolCast = CI;
-  if (!BoolCast)
-    if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(1)))
-      if (CI->getOperand(0)->getType() == Type::getInt1Ty(*Context))
-        BoolCast = CI;
-  if (BoolCast) {
-    if (ICmpInst *SCI = dyn_cast<ICmpInst>(BoolCast->getOperand(0))) {
-      Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1);
-      const Type *SCOpTy = SCIOp0->getType();
-      bool TIS = false;
-      
-      // If the icmp is true iff the sign bit of X is set, then convert this
-      // multiply into a shift/and combination.
-      if (isa<ConstantInt>(SCIOp1) &&
-          isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1), TIS) &&
-          TIS) {
-        // Shift the X value right to turn it into "all signbits".
-        Constant *Amt = ConstantInt::get(SCIOp0->getType(),
-                                          SCOpTy->getPrimitiveSizeInBits()-1);
-        Value *V = Builder->CreateAShr(SCIOp0, Amt,
-                                    BoolCast->getOperand(0)->getName()+".mask");
-
-        // If the multiply type is not the same as the source type, sign extend
-        // or truncate to the multiply type.
-        if (I.getType() != V->getType()) {
-          uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits();
-          uint32_t DstBits = I.getType()->getPrimitiveSizeInBits();
-          Instruction::CastOps opcode = 
-            (SrcBits == DstBits ? Instruction::BitCast : 
-             (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc));
-          V = InsertCastBefore(opcode, V, I.getType(), I);
-        }
+  //   X * Y (where Y is 0 or 1) -> X & (0-Y)
+  if (!isa<VectorType>(I.getType())) {
+    // -2 is "-1 << 1" so it is all bits set except the low one.
+    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+    
+    Value *BoolCast = 0, *OtherOp = 0;
+    if (MaskedValueIsZero(Op0, Negative2))
+      BoolCast = Op0, OtherOp = Op1;
+    else if (MaskedValueIsZero(Op1, Negative2))
+      BoolCast = Op1, OtherOp = Op0;
  
-        Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0;
-        return BinaryOperator::CreateAnd(V, OtherOp);
-      }
+    if (BoolCast) {
+      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+                                    BoolCast, "tmp");
+      return BinaryOperator::CreateAnd(V, OtherOp);
      }
    }
  
@@ -2790,17 +2870,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
  
  Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
    bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
    // Simplify mul instructions with a constant RHS...
-  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
-    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
        // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
        // ANSI says we can drop signals, so we can do this anyway." (from GCC)
        if (Op1F->isExactlyValue(1.0))
          return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
-    } else if (isa<VectorType>(Op1->getType())) {
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+    } else if (isa<VectorType>(Op1C->getType())) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
          // As above, vector X*splat(1.0) -> X in all defined cases.
          if (Constant *Splat = Op1V->getSplatValue()) {
            if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
@@ -2821,7 +2901,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
    }
  
    if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castFNegVal(I.getOperand(1)))
+    if (Value *Op1v = dyn_castFNegVal(Op1))
        return BinaryOperator::CreateFMul(Op0v, Op1v);
  
    return Changed ? &I : 0;
@@ -3455,9 +3535,9 @@ static Value *getFCmpValue(bool isordered, unsigned code,
  /// PredicatesFoldable - Return true if both predicates match sign or if at
  /// least one of them is an equality comparison (which is signless).
  static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
-  return (ICmpInst::isSignedPredicate(p1) == ICmpInst::isSignedPredicate(p2)) ||
-         (ICmpInst::isSignedPredicate(p1) && ICmpInst::isEquality(p2)) ||
-         (ICmpInst::isSignedPredicate(p2) && ICmpInst::isEquality(p1));
+  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
  }
  
  namespace { 
@@ -3494,9 +3574,7 @@ struct FoldICmpLogical {
      default: llvm_unreachable("Illegal logical opcode!"); return 0;
      }
  
-    bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) || 
-                    ICmpInst::isSignedPredicate(ICI->getPredicate());
-      
+    bool isSigned = RHSICI->isSigned() || ICI->isSigned();
      Value *RV = getICmpValue(isSigned, Code, LHS, RHS, IC.getContext());
      if (Instruction *I = dyn_cast<Instruction>(RV))
        return I;
@@ -3793,9 +3871,9 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
      
    // Ensure that the larger constant is on the RHS.
    bool ShouldSwap;
-  if (ICmpInst::isSignedPredicate(LHSCC) ||
+  if (CmpInst::isSigned(LHSCC) ||
        (ICmpInst::isEquality(LHSCC) && 
-       ICmpInst::isSignedPredicate(RHSCC)))
+       CmpInst::isSigned(RHSCC)))
      ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
    else
      ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
@@ -4028,34 +4106,32 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
    }
  
    if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
-    const APInt& AndRHSMask = AndRHS->getValue();
+    const APInt &AndRHSMask = AndRHS->getValue();
      APInt NotAndRHS(~AndRHSMask);
  
      // Optimize a variety of ((val OP C1) & C2) combinations...
-    if (isa<BinaryOperator>(Op0)) {
-      Instruction *Op0I = cast<Instruction>(Op0);
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
        Value *Op0LHS = Op0I->getOperand(0);
        Value *Op0RHS = Op0I->getOperand(1);
        switch (Op0I->getOpcode()) {
+      default: break;
        case Instruction::Xor:
        case Instruction::Or:
          // If the mask is only needed on one incoming arm, push it up.
-        if (Op0I->hasOneUse()) {
-          if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
-            // Not masking anything out for the LHS, move to RHS.
-            Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
-                                               Op0RHS->getName()+".masked");
-            return BinaryOperator::Create(
-                       cast<BinaryOperator>(Op0I)->getOpcode(), Op0LHS, NewRHS);
-          }
-          if (!isa<Constant>(Op0RHS) &&
-              MaskedValueIsZero(Op0RHS, NotAndRHS)) {
-            // Not masking anything out for the RHS, move to LHS.
-            Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
-                                               Op0LHS->getName()+".masked");
-            return BinaryOperator::Create(
-                       cast<BinaryOperator>(Op0I)->getOpcode(), NewLHS, Op0RHS);
-          }
+        if (!Op0I->hasOneUse()) break;
+          
+        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+          // Not masking anything out for the LHS, move to RHS.
+          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+                                             Op0RHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+        }
+        if (!isa<Constant>(Op0RHS) &&
+            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+          // Not masking anything out for the RHS, move to LHS.
+          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+                                             Op0LHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
          }
  
          break;
@@ -4114,7 +4190,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
        if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
          if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
              CastOp->getNumOperands() == 2)
-          if (ConstantInt *AndCI = dyn_cast<ConstantInt>(CastOp->getOperand(1))) {
+          if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
              if (CastOp->getOpcode() == Instruction::And) {
                // Change: and (cast (and X, C1) to T), C2
                // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
@@ -4483,9 +4559,9 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
    
    // Ensure that the larger constant is on the RHS.
    bool ShouldSwap;
-  if (ICmpInst::isSignedPredicate(LHSCC) ||
+  if (CmpInst::isSigned(LHSCC) ||
        (ICmpInst::isEquality(LHSCC) && 
-       ICmpInst::isSignedPredicate(RHSCC)))
+       CmpInst::isSigned(RHSCC)))
      ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
    else
      ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
@@ -4908,14 +4984,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
      if (Ret) return Ret;
    }
  
-  if (match(Op0, m_Not(m_Value(A)))) {   // ~A | Op1
+  if ((A = dyn_castNotVal(Op0))) {   // ~A | Op1
      if (A == Op1)   // ~A | A == -1
        return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
    } else {
      A = 0;
    }
    // Note, A is still live here!
-  if (match(Op1, m_Not(m_Value(B)))) {   // Op0 | ~B
+  if ((B = dyn_castNotVal(Op1))) {   // Op0 | ~B
      if (Op0 == B)
        return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
  
@@ -5012,12 +5088,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
  
    // Is this a ~ operation?
    if (Value *NotOp = dyn_castNotVal(&I)) {
-    // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
-    // ~(~X | Y) === (X & ~Y) - De Morgan's Law
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
        if (Op0I->getOpcode() == Instruction::And || 
            Op0I->getOpcode() == Instruction::Or) {
-        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
+        // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+        // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+        if (dyn_castNotVal(Op0I->getOperand(1)))
+          Op0I->swapOperands();
          if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
            Value *NotY =
              Builder->CreateNot(Op0I->getOperand(1),
@@ -5026,13 +5103,26 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
              return BinaryOperator::CreateOr(Op0NotVal, NotY);
            return BinaryOperator::CreateAnd(Op0NotVal, NotY);
          }
+        
+        // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+        // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+        if (isFreeToInvert(Op0I->getOperand(0)) && 
+            isFreeToInvert(Op0I->getOperand(1))) {
+          Value *NotX =
+            Builder->CreateNot(Op0I->getOperand(0), "notlhs");
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1), "notrhs");
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::CreateOr(NotX, NotY);
+          return BinaryOperator::CreateAnd(NotX, NotY);
+        }
        }
      }
    }
    
    
    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    if (RHS == ConstantInt::getTrue(*Context) && Op0->hasOneUse()) {
+    if (RHS->isOne() && Op0->hasOneUse()) {
        // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
        if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
          return new ICmpInst(ICI->getInversePredicate(),
@@ -5798,9 +5888,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
  
    // Fold trivial predicates.
    if (I.getPredicate() == FCmpInst::FCMP_FALSE)
-    return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
+    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
    if (I.getPredicate() == FCmpInst::FCMP_TRUE)
-    return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
    
    // Simplify 'fcmp pred X, X'
    if (Op0 == Op1) {
@@ -5809,11 +5899,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
      case FCmpInst::FCMP_UEQ:    // True if unordered or equal
      case FCmpInst::FCMP_UGE:    // True if unordered, greater than, or equal
      case FCmpInst::FCMP_ULE:    // True if unordered, less than, or equal
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
      case FCmpInst::FCMP_OGT:    // True if ordered and greater than
      case FCmpInst::FCMP_OLT:    // True if ordered and less than
      case FCmpInst::FCMP_ONE:    // True if ordered and operands are unequal
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
+      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
        
      case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
      case FCmpInst::FCMP_ULT:    // True if unordered or less than
@@ -5836,7 +5926,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
    }
      
    if (isa<UndefValue>(Op1))                  // fcmp pred X, undef -> undef
-    return ReplaceInstUsesWith(I, UndefValue::get(Type::getInt1Ty(*Context)));
+    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
  
    // Handle fcmp with constant RHS
    if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
@@ -5859,7 +5949,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
          // block.  If in the same block, we're encouraging jump threading.  If
          // not, we are just pessimizing the code by making an i1 phi.
          if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I))
+          if (Instruction *NV = FoldOpIntoPhi(I, true))
              return NV;
          break;
        case Instruction::SIToFP:
@@ -5904,17 +5994,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
  
    // icmp X, X
    if (Op0 == Op1)
-    return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), 
+    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(),
                                                     I.isTrueWhenEqual()));
  
    if (isa<UndefValue>(Op1))                  // X icmp undef -> undef
-    return ReplaceInstUsesWith(I, UndefValue::get(Type::getInt1Ty(*Context)));
+    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
    
    // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
    // addresses never equal each other!  We already know that Op0 != Op1.
-  if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
+  if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) || 
         isa<ConstantPointerNull>(Op0)) &&
-      (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
+      (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) || 
         isa<ConstantPointerNull>(Op1)))
      return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), 
                                                     !I.isTrueWhenEqual()));
@@ -6034,7 +6124,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
      // EQ and NE we use unsigned values.
      APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
      APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
-    if (ICmpInst::isSignedPredicate(I.getPredicate())) {
+    if (I.isSigned()) {
        ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
                                               Op0Min, Op0Max);
        ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
@@ -6164,7 +6254,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
  
      // Turn a signed comparison into an unsigned one if both operands
      // are known to have the same sign.
-    if (I.isSignedPredicate() &&
+    if (I.isSigned() &&
          ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
           (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
        return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
@@ -6215,11 +6305,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          break;
  
        case Instruction::PHI:
-        // Only fold icmp into the PHI if the phi and fcmp are in the same
+        // Only fold icmp into the PHI if the phi and icmp are in the same
          // block.  If in the same block, we're encouraging jump threading.  If
          // not, we are just pessimizing the code by making an i1 phi.
          if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I))
+          if (Instruction *NV = FoldOpIntoPhi(I, true))
              return NV;
          break;
        case Instruction::Select: {
@@ -6247,13 +6337,33 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
            return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
          break;
        }
-      case Instruction::Malloc:
+      case Instruction::Call:
          // If we have (malloc != null), and if the malloc has a single use, we
          // can assume it is successful and remove the malloc.
-        if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
-          Worklist.Add(LHSI);
-          return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context),
-                                                         !I.isTrueWhenEqual()));
+        if (isMalloc(LHSI) && LHSI->hasOneUse() &&
+            isa<ConstantPointerNull>(RHSC)) {
+          // Need to explicitly erase malloc call here, instead of adding it to
+          // Worklist, because it won't get DCE'd from the Worklist since
+          // isInstructionTriviallyDead() returns false for function calls.
+          // It is OK to replace LHSI/MallocCall with Undef because the 
+          // instruction that uses it will be erased via Worklist.
+          if (extractMallocCall(LHSI)) {
+            LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType()));
+            EraseInstFromFunction(*LHSI);
+            return ReplaceInstUsesWith(I,
+                                     ConstantInt::get(Type::getInt1Ty(*Context),
+                                                      !I.isTrueWhenEqual()));
+          }
+          if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI))
+            if (MallocCall->hasOneUse()) {
+              MallocCall->replaceAllUsesWith(
+                                        UndefValue::get(MallocCall->getType()));
+              EraseInstFromFunction(*MallocCall);
+              Worklist.Add(LHSI); // The malloc's bitcast use.
+              return ReplaceInstUsesWith(I,
+                                     ConstantInt::get(Type::getInt1Ty(*Context),
+                                                      !I.isTrueWhenEqual()));
+            }
          }
          break;
        }
@@ -6289,7 +6399,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
            Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
          } else {
            // Otherwise, cast the RHS right before the icmp
-          Op1 = InsertBitCastBefore(Op1, Op0->getType(), I);
+          Op1 = Builder->CreateBitCast(Op1, Op0->getType());
          }
        }
        return new ICmpInst(I.getPredicate(), Op0, Op1);
@@ -6324,7 +6434,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
            // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
            if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
              if (CI->getValue().isSignBit()) {
-              ICmpInst::Predicate Pred = I.isSignedPredicate()
+              ICmpInst::Predicate Pred = I.isSigned()
                                               ? I.getUnsignedPredicate()
                                               : I.getSignedPredicate();
                return new ICmpInst(Pred, Op0I->getOperand(0),
@@ -6332,7 +6442,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
              }
              
              if (CI->getValue().isMaxSignedValue()) {
-              ICmpInst::Predicate Pred = I.isSignedPredicate()
+              ICmpInst::Predicate Pred = I.isSigned()
                                               ? I.getUnsignedPredicate()
                                               : I.getSignedPredicate();
                Pred = I.getSwappedPredicate(Pred);
@@ -6469,7 +6579,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
    // work. :(  The if statement below tests that condition and bails 
    // if it finds it. 
    bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
-  if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate())
+  if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
      return 0;
    if (DivRHS->isZero())
      return 0; // The ProdOV computation fails on divide by zero.
@@ -6668,7 +6778,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
          // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
          if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
            const APInt &SignBit = XorCST->getValue();
-          ICmpInst::Predicate Pred = ICI.isSignedPredicate()
+          ICmpInst::Predicate Pred = ICI.isSigned()
                                           ? ICI.getUnsignedPredicate()
                                           : ICI.getSignedPredicate();
            return new ICmpInst(Pred, LHSI->getOperand(0),
@@ -6678,7 +6788,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
          // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
          if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
            const APInt &NotSignBit = XorCST->getValue();
-          ICmpInst::Predicate Pred = ICI.isSignedPredicate()
+          ICmpInst::Predicate Pred = ICI.isSigned()
                                           ? ICI.getUnsignedPredicate()
                                           : ICI.getSignedPredicate();
            Pred = ICI.getSwappedPredicate(Pred);
@@ -6936,7 +7046,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
        ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
                              .subtract(LHSV);
  
-      if (ICI.isSignedPredicate()) {
+      if (ICI.isSigned()) {
          if (CR.getLower().isSignBit()) {
            return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
                                ConstantInt::get(*Context, CR.getUpper()));
@@ -7097,7 +7207,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
        RHSOp = RHSC->getOperand(0);
        // If the pointer types don't match, insert a bitcast.
        if (LHSCIOp->getType() != RHSOp->getType())
-        RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI);
+        RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
      }
  
      if (RHSOp)
@@ -7111,7 +7221,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
      return 0;
  
    bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
-  bool isSignedCmp = ICI.isSignedPredicate();
+  bool isSignedCmp = ICI.isSigned();
  
    if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
      // Not an extension from the same type?
@@ -7672,7 +7782,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
  /// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
  /// try to eliminate the cast by moving the type information into the alloc.
  Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
-                                                   AllocationInst &AI) {
+                                                   AllocaInst &AI) {
    const PointerType *PTy = cast<PointerType>(CI.getType());
    
    BuilderTy AllocaBuilder(*Builder);
@@ -7744,11 +7854,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
      Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
    }
    
-  AllocationInst *New;
-  if (isa<MallocInst>(AI))
-    New = AllocaBuilder.CreateMalloc(CastElTy, Amt);
-  else
-    New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+  AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
    New->setAlignment(AI.getAlignment());
    New->takeName(&AI);
    
@@ -8107,11 +8213,11 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
            // If we were able to index down into an element, create the GEP
            // and bitcast the result.  This eliminates one bitcast, potentially
            // two.
-          Value *NGEP = Builder->CreateGEP(OrigBase, NewIndices.begin(),
-                                           NewIndices.end());
+          Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
+            Builder->CreateInBoundsGEP(OrigBase,
+                                       NewIndices.begin(), NewIndices.end()) :
+            Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
            NGEP->takeName(GEP);
-          if (isa<Instruction>(NGEP) && cast<GEPOperator>(GEP)->isInBounds())
-            cast<GEPOperator>(NGEP)->setIsInBounds(true);
            
            if (isa<BitCastInst>(CI))
              return new BitCastInst(NGEP, CI.getType());
@@ -8268,9 +8374,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
            return ReplaceInstUsesWith(CI, Res);
  
          // We need to emit a cast to truncate, then a cast to sext.
-        return CastInst::Create(Instruction::SExt,
-            InsertCastBefore(Instruction::Trunc, Res, Src->getType(), 
-                             CI), DestTy);
+        return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy);
        }
        }
      }
@@ -8290,8 +8394,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
        // Don't insert two casts unless at least one can be eliminated.
        if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) ||
            !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) {
-        Value *Op0c = InsertCastBefore(Instruction::Trunc, Op0, DestTy, *SrcI);
-        Value *Op1c = InsertCastBefore(Instruction::Trunc, Op1, DestTy, *SrcI);
+        Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName());
+        Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());
          return BinaryOperator::Create(
              cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
        }
@@ -8302,7 +8406,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
          SrcI->getOpcode() == Instruction::Xor &&
          Op1 == ConstantInt::getTrue(*Context) &&
          (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) {
-      Value *New = InsertCastBefore(Instruction::ZExt, Op0, DestTy, CI);
+      Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName());
        return BinaryOperator::CreateXor(New,
                                        ConstantInt::get(CI.getType(), 1));
      }
@@ -8313,8 +8417,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
      ConstantInt *CI = dyn_cast<ConstantInt>(Op1);
      if (CI && DestBitSize < SrcBitSize &&
          CI->getLimitedValue(DestBitSize) < DestBitSize) {
-      Value *Op0c = InsertCastBefore(Instruction::Trunc, Op0, DestTy, *SrcI);
-      Value *Op1c = InsertCastBefore(Instruction::Trunc, Op1, DestTy, *SrcI);
+      Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName());
+      Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());
        return BinaryOperator::CreateShl(Op0c, Op1c);
      }
      break;
@@ -8355,7 +8459,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
        
        // Okay, we can shrink this.  Truncate the input, then return a new
        // shift.
-      Value *V1 = InsertCastBefore(Instruction::Trunc, ShiftOp, Ty, CI);
+      Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName());
        Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty);
        return BinaryOperator::CreateLShr(V1, V2);
      }
@@ -8506,8 +8610,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
      if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
          (transformZExtICmp(LHS, CI, false) ||
           transformZExtICmp(RHS, CI, false))) {
-      Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI);
-      Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI);
+      Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+      Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
        return BinaryOperator::Create(Instruction::Or, LCast, RCast);
      }
    }
@@ -8677,10 +8781,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
          // the cast, do this xform.
          if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
              RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
-          LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
-                                      CI.getType(), CI);
-          RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
-                                      CI.getType(), CI);
+          LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
+          RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
            return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
          }
        }
@@ -8809,9 +8911,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
        return 0;
      
-    // If we are casting a malloc or alloca to a pointer to a type of the same
+    // If we are casting a alloca to a pointer to a type of the same
      // size, rewrite the allocation instruction to allocate the "right" type.
-    if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
+    // There is no need to modify malloc calls because it is their bitcast that
+    // needs to be cleaned up.
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
        if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
          return V;
      
@@ -8830,21 +8934,17 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      // If we found a path from the src to dest, create the getelementptr now.
      if (SrcElTy == DstElTy) {
        SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-      Instruction *GEP = GetElementPtrInst::Create(Src,
-                                                   Idxs.begin(), Idxs.end(), "",
-                                                   ((Instruction*) NULL));
-      cast<GEPOperator>(GEP)->setIsInBounds(true);
-      return GEP;
+      return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "",
+                                               ((Instruction*) NULL));
      }
    }
  
    if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
      if (DestVTy->getNumElements() == 1) {
        if (!isa<VectorType>(SrcTy)) {
-        Value *Elem = InsertCastBefore(Instruction::BitCast, Src,
-                                       DestVTy->getElementType(), CI);
+        Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
          return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
-                                         Constant::getNullValue(Type::getInt32Ty(*Context)));
+                            Constant::getNullValue(Type::getInt32Ty(*Context)));
        }
        // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
      }
@@ -8878,10 +8978,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
               Tmp->getOperand(0)->getType() == DestTy) ||
              ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) && 
               Tmp->getOperand(0)->getType() == DestTy)) {
-          Value *LHS = InsertCastBefore(Instruction::BitCast,
-                                        SVI->getOperand(0), DestTy, CI);
-          Value *RHS = InsertCastBefore(Instruction::BitCast,
-                                        SVI->getOperand(1), DestTy, CI);
+          Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+          Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
            // Return a new shuffle vector.  Use the same element ID's, as we
            // know the vector types match #elts.
            return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
@@ -9213,6 +9311,44 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
    return Changed ? &SI : 0;
  }
  
+
+/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
+/// PHI node (but the two may be in different blocks).  See if the true/false
+/// values (V) are live in all of the predecessor blocks of the PHI.  For
+/// example, cases like this cannot be mapped:
+///
+///   X = phi [ C1, BB1], [C2, BB2]
+///   Y = add
+///   Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+                                                   const SelectInst &SI) {
+  // If the value is a non-instruction value like a constant or argument, it
+  // can always be mapped.
+  const Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return true;
+  
+  // If V is a PHI node defined in the same block as the condition PHI, we can
+  // map the arguments.
+  const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+  
+  if (const PHINode *VP = dyn_cast<PHINode>(I))
+    if (VP->getParent() == CondPHI->getParent())
+      return true;
+  
+  // Otherwise, if the PHI and select are defined in the same block and if V is
+  // defined in a different block, then we can transform it.
+  if (SI.getParent() == CondPHI->getParent() &&
+      I->getParent() != CondPHI->getParent())
+    return true;
+  
+  // Otherwise we have a 'hard' case and we can't tell without doing more
+  // detailed dominator based analysis, punt.
+  return false;
+}
+
  Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    Value *CondVal = SI.getCondition();
    Value *TrueVal = SI.getTrueValue();
@@ -9424,6 +9560,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
        return FoldI;
    }
  
+  // See if we can fold the select into a phi node if the condition is a select.
+  if (isa<PHINode>(SI.getCondition())) 
+    // The true/false values have to be live in the PHI predecessor's blocks.
+    if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+        CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+      if (Instruction *NV = FoldOpIntoPhi(SI))
+        return NV;
+
    if (BinaryOperator::isNot(CondVal)) {
      SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
      SI.setOperand(1, FalseVal);
@@ -9479,16 +9623,13 @@ static unsigned EnforceKnownAlignment(Value *V,
          Align = PrefAlign;
        }
      }
-  } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
-    // If there is a requested alignment and if this is an alloca, round up.  We
-    // don't do this for malloc, because some systems can't respect the request.
-    if (isa<AllocaInst>(AI)) {
-      if (AI->getAlignment() >= PrefAlign)
-        Align = AI->getAlignment();
-      else {
-        AI->setAlignment(PrefAlign);
-        Align = PrefAlign;
-      }
+  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      Align = AI->getAlignment();
+    else {
+      AI->setAlignment(PrefAlign);
+      Align = PrefAlign;
      }
    }
  
@@ -9584,8 +9725,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
    SrcAlign = std::max(SrcAlign, CopyAlign);
    DstAlign = std::max(DstAlign, CopyAlign);
    
-  Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI);
-  Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI);
+  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
+  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
    Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
    InsertNewInstBefore(L, *MI);
    InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
@@ -9619,7 +9760,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
      const Type *ITy = IntegerType::get(*Context, Len*8);  // n=1 -> i8.
      
      Value *Dest = MI->getDest();
-    Dest = InsertBitCastBefore(Dest, PointerType::getUnqual(ITy), *MI);
+    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
  
      // Alignment 0 is identity for alignment 1 for memset, but not store.
      if (Alignment == 0) Alignment = 1;
@@ -9643,6 +9784,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
  /// the heavy lifting.
  ///
  Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+  if (isFreeCall(&CI))
+    return visitFree(CI);
+
    // If the caller function is nounwind, mark the call as nounwind, even if the
    // callee isn't.
    if (CI.getParent()->getParent()->doesNotThrow() &&
@@ -9720,9 +9864,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      // Turn PPC lvx     -> load if the pointer is known aligned.
      // Turn X86 loadups -> load if the pointer is known aligned.
      if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
-      Value *Ptr = InsertBitCastBefore(II->getOperand(1),
-                                   PointerType::getUnqual(II->getType()),
-                                       CI);
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
+                                         PointerType::getUnqual(II->getType()));
        return new LoadInst(Ptr);
      }
      break;
@@ -9732,7 +9875,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
        const Type *OpPtrTy = 
          PointerType::getUnqual(II->getOperand(1)->getType());
-      Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
        return new StoreInst(II->getOperand(1), Ptr);
      }
      break;
@@ -9743,7 +9886,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
        const Type *OpPtrTy = 
          PointerType::getUnqual(II->getOperand(2)->getType());
-      Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
        return new StoreInst(II->getOperand(2), Ptr);
      }
      break;
@@ -9780,8 +9923,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
        
        if (AllEltsOk) {
          // Cast the input vectors to byte vectors.
-        Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
-        Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
+        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
+        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
          Value *Result = UndefValue::get(Op0->getType());
          
          // Only extract each element once.
@@ -9828,7 +9971,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      TerminatorInst *TI = II->getParent()->getTerminator();
      bool CannotRemove = false;
      for (++BI; &*BI != TI; ++BI) {
-      if (isa<AllocaInst>(BI)) {
+      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
          CannotRemove = true;
          break;
        }
@@ -9906,9 +10049,11 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
        // If the call and callee calling conventions don't match, this call must
        // be unreachable, as the call is undefined.
        new StoreInst(ConstantInt::getTrue(*Context),
-                UndefValue::get(PointerType::getUnqual(Type::getInt1Ty(*Context))), 
+                UndefValue::get(Type::getInt1PtrTy(*Context)), 
                                    OldCall);
-      if (!OldCall->use_empty())
+      // If OldCall dues not return void then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!OldCall->getType()->isVoidTy())
          OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
        if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
          return EraseInstFromFunction(*OldCall);
@@ -9920,10 +10065,12 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
      // undef so that we know that this code is not reachable, despite the fact
      // that we can't modify the CFG here.
      new StoreInst(ConstantInt::getTrue(*Context),
-               UndefValue::get(PointerType::getUnqual(Type::getInt1Ty(*Context))),
+               UndefValue::get(Type::getInt1PtrTy(*Context)),
                    CS.getInstruction());
  
-    if (!CS.getInstruction()->use_empty())
+    // If CS dues not return void then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!CS.getInstruction()->getType()->isVoidTy())
        CS.getInstruction()->
          replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
  
@@ -10002,7 +10149,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
  
      if (!Caller->use_empty() &&
          // void -> non-void is handled specially
-        NewRetTy != Type::getVoidTy(*Context) && !CastInst::isCastable(NewRetTy, OldRetTy))
+        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
        return false;   // Cannot transform this return value.
  
      if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
@@ -10134,7 +10281,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
      attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
  
-  if (NewRetTy == Type::getVoidTy(*Context))
+  if (NewRetTy->isVoidTy())
      Caller->setName("");   // Void type should not have a name.
  
    const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
@@ -10160,7 +10307,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    // Insert a cast of the return type as necessary.
    Value *NV = NC;
    if (OldRetTy != NV->getType() && !Caller->use_empty()) {
-    if (NV->getType() != Type::getVoidTy(*Context)) {
+    if (!NV->getType()->isVoidTy()) {
        Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 
                                                              OldRetTy, false);
        NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
@@ -10180,10 +10327,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
      }
    }
  
-  if (Caller->getType() != Type::getVoidTy(*Context) && !Caller->use_empty())
+
+  if (!Caller->use_empty())
      Caller->replaceAllUsesWith(NV);
-  Caller->eraseFromParent();
-  Worklist.Remove(Caller);
+  
+  EraseInstFromFunction(*Caller);
    return true;
  }
  
@@ -10326,7 +10474,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
            setCallingConv(cast<CallInst>(Caller)->getCallingConv());
          cast<CallInst>(NewCaller)->setAttributes(NewPAL);
        }
-      if (Caller->getType() != Type::getVoidTy(*Context) && !Caller->use_empty())
+      if (!Caller->getType()->isVoidTy())
          Caller->replaceAllUsesWith(NewCaller);
        Caller->eraseFromParent();
        Worklist.Remove(Caller);
@@ -10344,8 +10492,8 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
    return CS.getInstruction();
  }
  
-/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)]
-/// and if a/b/c/d and the add's all have a single use, turn this into two phi's
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
+/// and if a/b/c and the add's all have a single use, turn this into a phi
  /// and a single binop.
  Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
    Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
@@ -10357,8 +10505,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
    const Type *LHSType = LHSVal->getType();
    const Type *RHSType = RHSVal->getType();
    
-  // Scan to see if all operands are the same opcode, all have one use, and all
-  // kill their operands (i.e. the operands have one use).
+  // Scan to see if all operands are the same opcode, and all have one use.
    for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
      Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
      if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
@@ -10378,6 +10525,13 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
      if (I->getOperand(0) != LHSVal) LHSVal = 0;
      if (I->getOperand(1) != RHSVal) RHSVal = 0;
    }
+
+  // If both LHS and RHS would need a PHI, don't do this transformation,
+  // because it would increase the number of PHIs entering the block,
+  // which leads to higher register pressure. This is especially
+  // bad when the PHIs are in the header of a loop.
+  if (!LHSVal && !RHSVal)
+    return 0;
    
    // Otherwise, this is safe to transform!
    
@@ -10432,9 +10586,13 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
    // This is true if all GEP bases are allocas and if all indices into them are
    // constants.
    bool AllBasePointersAreAllocas = true;
+
+  // We don't want to replace this phi if the replacement would require
+  // more than one phi, which leads to higher register pressure. This is
+  // especially bad when the PHIs are in the header of a loop.
+  bool NeededPhi = false;
    
-  // Scan to see if all operands are the same opcode, all have one use, and all
-  // kill their operands (i.e. the operands have one use).
+  // Scan to see if all operands are the same opcode, and all have one use.
    for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
      GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
      if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
@@ -10463,7 +10621,16 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
        
        if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
          return 0;
+
+      // If we already needed a PHI for an earlier operand, and another operand
+      // also requires a PHI, we'd be introducing more PHIs than we're
+      // eliminating, which increases register pressure on entry to the PHI's
+      // block.
+      if (NeededPhi)
+        return 0;
+
        FixedOperands[op] = 0;  // Needs a PHI.
+      NeededPhi = true;
      }
    }
    
@@ -10509,12 +10676,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
    }
    
    Value *Base = FixedOperands[0];
-  GetElementPtrInst *GEP =
+  return cast<GEPOperator>(FirstInst)->isInBounds() ?
+    GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
+                                      FixedOperands.end()) :
      GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
                                FixedOperands.end());
-  if (cast<GEPOperator>(FirstInst)->isInBounds())
-    cast<GEPOperator>(GEP)->setIsInBounds(true);
-  return GEP;
  }
  
  
@@ -10819,8 +10985,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
  
  Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    Value *PtrOp = GEP.getOperand(0);
-  // Is it 'getelementptr %P, i32 0'  or 'getelementptr %P'
-  // If so, eliminate the noop.
+  // Eliminate 'getelementptr %P, i32 0' and 'getelementptr %P', they are noops.
    if (GEP.getNumOperands() == 1)
      return ReplaceInstUsesWith(GEP, PtrOp);
  
@@ -10848,13 +11013,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        // to what we need.  If narrower, sign-extend it to what we need.  This
        // explicit cast can make subsequent optimizations more obvious.
        unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
-      
        if (OpBits == PtrSize)
          continue;
        
-      Instruction::CastOps Opc =
-        OpBits > PtrSize ? Instruction::Trunc : Instruction::SExt;
-      *I = InsertCastBefore(Opc, *I, TD->getIntPtrType(GEP.getContext()), GEP);
+      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
        MadeChange = true;
      }
      if (MadeChange) return &GEP;
@@ -10921,28 +11083,34 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        Indices.append(GEP.idx_begin()+1, GEP.idx_end());
      }
  
-    if (!Indices.empty()) {
-      GetElementPtrInst *NewGEP =
+    if (!Indices.empty())
+      return (cast<GEPOperator>(&GEP)->isInBounds() &&
+              Src->isInBounds()) ?
+        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
+                                          Indices.end(), GEP.getName()) :
          GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
                                    Indices.end(), GEP.getName());
-      if (cast<GEPOperator>(&GEP)->isInBounds() && Src->isInBounds())
-        cast<GEPOperator>(NewGEP)->setIsInBounds(true);
-      return NewGEP;
-    }
    }
    
    // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
    if (Value *X = getBitCastOperand(PtrOp)) {
      assert(isa<PointerType>(X->getType()) && "Must be cast from pointer");
-           
+
+    // If the input bitcast is actually "bitcast(bitcast(x))", then we don't 
+    // want to change the gep until the bitcasts are eliminated.
+    if (getBitCastOperand(X)) {
+      Worklist.AddValue(PtrOp);
+      return 0;
+    }
+    
+    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+    // into     : GEP [10 x i8]* X, i32 0, ...
+    //
+    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+    //           into     : GEP i8* X, ...
+    // 
+    // This occurs when the program declares an array extern like "int X[];"
      if (HasZeroPointerIndex) {
-      // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
-      // into     : GEP [10 x i8]* X, i32 0, ...
-      //
-      // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
-      //           into     : GEP i8* X, ...
-      // 
-      // This occurs when the program declares an array extern like "int X[];"
        const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
        const PointerType *XTy = cast<PointerType>(X->getType());
        if (const ArrayType *CATy =
@@ -10951,14 +11119,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
          if (CATy->getElementType() == XTy->getElementType()) {
            // -> GEP i8* X, ...
            SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
-          GetElementPtrInst *NewGEP =
+          return cast<GEPOperator>(&GEP)->isInBounds() ?
+            GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(),
+                                              GEP.getName()) :
              GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
                                        GEP.getName());
-          if (cast<GEPOperator>(&GEP)->isInBounds())
-            cast<GEPOperator>(NewGEP)->setIsInBounds(true);
-          return NewGEP;
-        } else if (const ArrayType *XATy =
-                 dyn_cast<ArrayType>(XTy->getElementType())) {
+        }
+        
+        if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){
            // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
            if (CATy->getElementType() == XATy->getElementType()) {
              // -> GEP [10 x i8]* X, i32 0, ...
@@ -10983,10 +11151,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
          Value *Idx[2];
          Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
          Idx[1] = GEP.getOperand(1);
-        Value *NewGEP =
+        Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
+          Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
            Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
-        if (cast<GEPOperator>(&GEP)->isInBounds())
-          cast<GEPOperator>(NewGEP)->setIsInBounds(true);
          // V and GEP are both pointer types --> BitCast
          return new BitCastInst(NewGEP, GEP.getType());
        }
@@ -11043,9 +11210,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
            Value *Idx[2];
            Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
            Idx[1] = NewIdx;
-          Value *NewGEP = Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
-          if (cast<GEPOperator>(&GEP)->isInBounds())
-            cast<GEPOperator>(NewGEP)->setIsInBounds(true);
+          Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
+            Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
+            Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
            // The NewGEP must be pointer typed, so must the old one -> BitCast
            return new BitCastInst(NewGEP, GEP.getType());
          }
@@ -11072,7 +11239,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        if (Offset == 0) {
          // If the bitcast is of an allocation, and the allocation will be
          // converted to match the type of the cast, don't touch this.
-        if (isa<AllocationInst>(BCI->getOperand(0))) {
+        if (isa<AllocaInst>(BCI->getOperand(0)) ||
+            isMalloc(BCI->getOperand(0))) {
            // See if the bitcast simplifies, if so, don't nuke this GEP yet.
            if (Instruction *I = visitBitCast(*BCI)) {
              if (I != BCI) {
@@ -11093,10 +11261,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        const Type *InTy =
          cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
        if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) {
-        Value *NGEP = Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
-                                         NewIndices.end());
-        if (cast<GEPOperator>(&GEP)->isInBounds())
-          cast<GEPOperator>(NGEP)->setIsInBounds(true);
+        Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
+          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
+                                     NewIndices.end()) :
+          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
+                             NewIndices.end());
          
          if (NGEP->getType() == GEP.getType())
            return ReplaceInstUsesWith(GEP, NGEP);
@@ -11109,28 +11278,21 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    return 0;
  }
  
-Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
    // Convert: malloc Ty, C - where C is a constant != 1 into: malloc [C x Ty], 1
    if (AI.isArrayAllocation()) {  // Check C != 1
      if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
        const Type *NewTy = 
          ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
-      AllocationInst *New = 0;
-
-      // Create and insert the replacement instruction...
-      if (isa<MallocInst>(AI))
-        New = Builder->CreateMalloc(NewTy, 0, AI.getName());
-      else {
-        assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
-        New = Builder->CreateAlloca(NewTy, 0, AI.getName());
-      }
+      assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
+      AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
        New->setAlignment(AI.getAlignment());
  
        // Scan to the end of the allocation instructions, to skip over a block of
        // allocas if possible...also skip interleaved debug info
        //
        BasicBlock::iterator It = New;
-      while (isa<AllocationInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
+      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
  
        // Now that I is pointing to the first non-allocation-inst in the block,
        // insert our getelementptr instruction...
@@ -11139,9 +11301,8 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
        Value *Idx[2];
        Idx[0] = NullIdx;
        Idx[1] = NullIdx;
-      Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2,
-                                           New->getName()+".sub", It);
-      cast<GEPOperator>(V)->setIsInBounds(true);
+      Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
+                                                   New->getName()+".sub", It);
  
        // Now make everything use the getelementptr instead of the original
        // allocation.
@@ -11166,14 +11327,14 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
    return 0;
  }
  
-Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
-  Value *Op = FI.getOperand(0);
+Instruction *InstCombiner::visitFree(Instruction &FI) {
+  Value *Op = FI.getOperand(1);
  
    // free undef -> unreachable.
    if (isa<UndefValue>(Op)) {
      // Insert a new store to null because we cannot modify the CFG here.
      new StoreInst(ConstantInt::getTrue(*Context),
-           UndefValue::get(PointerType::getUnqual(Type::getInt1Ty(*Context))), &FI);
+           UndefValue::get(Type::getInt1PtrTy(*Context)), &FI);
      return EraseInstFromFunction(FI);
    }
    
@@ -11181,33 +11342,26 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
    // when lots of inlining happens.
    if (isa<ConstantPointerNull>(Op))
      return EraseInstFromFunction(FI);
-  
-  // Change free <ty>* (cast <ty2>* X to <ty>*) into free <ty2>* X
-  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op)) {
-    FI.setOperand(0, CI->getOperand(0));
-    return &FI;
-  }
-  
-  // Change free (gep X, 0,0,0,0) into free(X)
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
-    if (GEPI->hasAllZeroIndices()) {
-      Worklist.Add(GEPI);
-      FI.setOperand(0, GEPI->getOperand(0));
-      return &FI;
-    }
-  }
-  
-  // Change free(malloc) into nothing, if the malloc has a single use.
-  if (MallocInst *MI = dyn_cast<MallocInst>(Op))
-    if (MI->hasOneUse()) {
-      EraseInstFromFunction(FI);
-      return EraseInstFromFunction(*MI);
+
+  // If we have a malloc call whose only use is a free call, delete both.
+  if (isMalloc(Op))
+    if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
+      if (Op->hasOneUse() && CI->hasOneUse()) {
+        EraseInstFromFunction(FI);
+        EraseInstFromFunction(*CI);
+        return EraseInstFromFunction(*cast<Instruction>(Op));
+      }
+    } else {
+      // Op is a call to malloc
+      if (Op->hasOneUse()) {
+        EraseInstFromFunction(FI);
+        return EraseInstFromFunction(*cast<Instruction>(Op));
+      }
      }
  
    return 0;
  }
  
-
  /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
  static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
                                          const TargetData *TD) {
@@ -11215,40 +11369,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
    Value *CastOp = CI->getOperand(0);
    LLVMContext *Context = IC.getContext();
  
-  if (TD) {
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
-      // Instead of loading constant c string, use corresponding integer value
-      // directly if string length is small enough.
-      std::string Str;
-      if (GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) {
-        unsigned len = Str.length();
-        const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
-        unsigned numBits = Ty->getPrimitiveSizeInBits();
-        // Replace LI with immediate integer store.
-        if ((numBits >> 3) == len + 1) {
-          APInt StrVal(numBits, 0);
-          APInt SingleChar(numBits, 0);
-          if (TD->isLittleEndian()) {
-            for (signed i = len-1; i >= 0; i--) {
-              SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
-              StrVal = (StrVal << 8) | SingleChar;
-            }
-          } else {
-            for (unsigned i = 0; i < len; i++) {
-              SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
-              StrVal = (StrVal << 8) | SingleChar;
-            }
-            // Append NULL at the end.
-            SingleChar = 0;
-            StrVal = (StrVal << 8) | SingleChar;
-          }
-          Value *NL = ConstantInt::get(*Context, StrVal);
-          return IC.ReplaceInstUsesWith(LI, NL);
-        }
-      }
-    }
-  }
-
    const PointerType *DestTy = cast<PointerType>(CI->getType());
    const Type *DestPTy = DestTy->getElementType();
    if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
@@ -11268,7 +11388,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
          if (Constant *CSrc = dyn_cast<Constant>(CastOp))
            if (ASrcTy->getNumElements() != 0) {
              Value *Idxs[2];
-            Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context));
+            Idxs[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+            Idxs[1] = Idxs[0];
              CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
              SrcTy = cast<PointerType>(CastOp->getType());
              SrcPTy = SrcTy->getElementType();
@@ -11309,7 +11430,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
        LI.setAlignment(KnownAlign);
    }
  
-  // load (cast X) --> cast (load X) iff safe
+  // load (cast X) --> cast (load X) iff safe.
    if (isa<CastInst>(Op))
      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
        return Res;
@@ -11324,11 +11445,11 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
      return ReplaceInstUsesWith(LI, AvailableVal);
  
+  // load(gep null, ...) -> unreachable
    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
      const Value *GEPI0 = GEPI->getOperand(0);
      // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<ConstantPointerNull>(GEPI0) &&
-        cast<PointerType>(GEPI0->getType())->getAddressSpace() == 0) {
+    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
        // Insert a new store to null instruction before the load to indicate
        // that this code is not reachable.  We do this instead of inserting
        // an unreachable instruction directly because we cannot modify the
@@ -11339,61 +11460,24 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
      }
    } 
  
-  if (Constant *C = dyn_cast<Constant>(Op)) {
-    // load null/undef -> undef
-    // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<UndefValue>(C) || (C->isNullValue() && 
-        cast<PointerType>(Op->getType())->getAddressSpace() == 0)) {
-      // Insert a new store to null instruction before the load to indicate that
-      // this code is not reachable.  We do this instead of inserting an
-      // unreachable instruction directly because we cannot modify the CFG.
-      new StoreInst(UndefValue::get(LI.getType()),
-                    Constant::getNullValue(Op->getType()), &LI);
-      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
-    }
-
-    // Instcombine load (constant global) into the value loaded.
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op))
-      if (GV->isConstant() && GV->hasDefinitiveInitializer())
-        return ReplaceInstUsesWith(LI, GV->getInitializer());
-
-    // Instcombine load (constantexpr_GEP global, 0, ...) into the value loaded.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) {
-      if (CE->getOpcode() == Instruction::GetElementPtr) {
-        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
-          if (GV->isConstant() && GV->hasDefinitiveInitializer())
-            if (Constant *V = 
-               ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE, 
-                                                      *Context))
-              return ReplaceInstUsesWith(LI, V);
-        if (CE->getOperand(0)->isNullValue()) {
-          // Insert a new store to null instruction before the load to indicate
-          // that this code is not reachable.  We do this instead of inserting
-          // an unreachable instruction directly because we cannot modify the
-          // CFG.
-          new StoreInst(UndefValue::get(LI.getType()),
-                        Constant::getNullValue(Op->getType()), &LI);
-          return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
-        }
-
-      } else if (CE->isCast()) {
-        if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
-          return Res;
-      }
-    }
-  }
-    
-  // If this load comes from anywhere in a constant global, and if the global
-  // is all undef or zero, we know what it loads.
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){
-    if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
-      if (GV->getInitializer()->isNullValue())
-        return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
-      else if (isa<UndefValue>(GV->getInitializer()))
-        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
-    }
+  // load null/undef -> unreachable
+  // TODO: Consider a target hook for valid address spaces for this xform.
+  if (isa<UndefValue>(Op) ||
+      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+    // Insert a new store to null instruction before the load to indicate that
+    // this code is not reachable.  We do this instead of inserting an
+    // unreachable instruction directly because we cannot modify the CFG.
+    new StoreInst(UndefValue::get(LI.getType()),
+                  Constant::getNullValue(Op->getType()), &LI);
+    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
    }
  
+  // Instcombine load (constantexpr_cast global) -> cast (load global)
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+    if (CE->isCast())
+      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+        return Res;
+  
    if (Op->hasOneUse()) {
      // Change select and PHI nodes to select values instead of addresses: this
      // helps alias analysis out a lot, allows many others simplifications, and
@@ -11511,11 +11595,9 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
    
    // SIOp0 is a pointer to aggregate and this is a store to the first field,
    // emit a GEP to index into its first field.
-  if (!NewGEPIndices.empty()) {
-    CastOp = IC.Builder->CreateGEP(CastOp, NewGEPIndices.begin(),
-                                   NewGEPIndices.end());
-    cast<GEPOperator>(CastOp)->setIsInBounds(true);
-  }
+  if (!NewGEPIndices.empty())
+    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
+                                           NewGEPIndices.end());
    
    NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
                                     SIOp0->getName()+".c");
@@ -11678,8 +11760,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
    if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
  
    // store X, null    -> turns into 'unreachable' in SimplifyCFG
-  if (isa<ConstantPointerNull>(Ptr) &&
-      cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
+  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
      if (!isa<UndefValue>(Val)) {
        SI.setOperand(0, UndefValue::get(Val->getType()));
        if (Instruction *U = dyn_cast<Instruction>(Val))
@@ -12115,7 +12196,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
      // that element. When the elements are not identical, we cannot replace yet
      // (we do that below, but only when the index is constant).
      Constant *op0 = C->getOperand(0);
-    for (unsigned i = 1; i < C->getNumOperands(); ++i)
+    for (unsigned i = 1; i != C->getNumOperands(); ++i)
        if (C->getOperand(i) != op0) {
          op0 = 0; 
          break;
@@ -12128,8 +12209,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
    // find a previously computed scalar that was inserted into the vector.
    if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
      unsigned IndexVal = IdxC->getZExtValue();
-    unsigned VectorWidth = 
-      cast<VectorType>(EI.getOperand(0)->getType())->getNumElements();
+    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
        
      // If this is extracting an invalid index, turn this into undef, to avoid
      // crashing the code below.
@@ -12166,46 +12246,26 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
    }
    
    if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
-    if (I->hasOneUse()) {
-      // Push extractelement into predecessor operation if legal and
-      // profitable to do so
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-        bool isConstantElt = isa<ConstantInt>(EI.getOperand(1));
-        if (CheapToScalarize(BO, isConstantElt)) {
-          Value *newEI0 =
-            Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
-                                          EI.getName()+".lhs");
-          Value *newEI1 =
-            Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
-                                          EI.getName()+".rhs");
-          return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
-        }
-      } else if (isa<LoadInst>(I)) {
-        unsigned AS = 
-          cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace();
-        Value *Ptr = InsertBitCastBefore(I->getOperand(0),
-                                  PointerType::get(EI.getType(), AS),*I);
-        Value *GEP =
-          Builder->CreateGEP(Ptr, EI.getOperand(1), I->getName()+".gep");
-        cast<GEPOperator>(GEP)->setIsInBounds(true);
-        
-        LoadInst *Load = Builder->CreateLoad(GEP, "tmp");
-
-        // Make sure the Load goes before the load instruction in the source,
-        // not wherever the extract happens to be.
-        Load->moveBefore(I);
-        
-        return ReplaceInstUsesWith(EI, Load);
-      }
-    }
-    if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+    // Push extractelement into predecessor operation if legal and
+    // profitable to do so
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+      if (I->hasOneUse() &&
+          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+        Value *newEI0 =
+          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+                                        EI.getName()+".lhs");
+        Value *newEI1 =
+          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+                                        EI.getName()+".rhs");
+        return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+      }
+    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
        // Extracting the inserted element?
        if (IE->getOperand(2) == EI.getOperand(1))
          return ReplaceInstUsesWith(EI, IE->getOperand(1));
        // If the inserted and extracted elements are constants, they must not
        // be the same value, extract from the pre-inserted value instead.
-      if (isa<Constant>(IE->getOperand(2)) &&
-          isa<Constant>(EI.getOperand(1))) {
+      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
          Worklist.AddValue(EI.getOperand(0));
          EI.setOperand(0, IE->getOperand(0));
          return &EI;
@@ -12228,7 +12288,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
            return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
          }
          return ExtractElementInst::Create(Src,
-                         ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx, false));
+                         ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx,
+                                          false));
        }
      }
      // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
@@ -12404,28 +12465,6 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
        if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
          return ReplaceInstUsesWith(IE, VecOp);      
        
-      // We could theoretically do this for ANY input.  However, doing so could
-      // turn chains of insertelement instructions into a chain of shufflevector
-      // instructions, and right now we do not merge shufflevectors.  As such,
-      // only do this in a situation where it is clear that there is benefit.
-      if (isa<UndefValue>(VecOp) || isa<ConstantAggregateZero>(VecOp)) {
-        // Turn this into shuffle(EIOp0, VecOp, Mask).  The result has all of
-        // the values of VecOp, except then one read from EIOp0.
-        // Build a new shuffle mask.
-        std::vector<Constant*> Mask;
-        if (isa<UndefValue>(VecOp))
-          Mask.assign(NumVectorElts, UndefValue::get(Type::getInt32Ty(*Context)));
-        else {
-          assert(isa<ConstantAggregateZero>(VecOp) && "Unknown thing");
-          Mask.assign(NumVectorElts, ConstantInt::get(Type::getInt32Ty(*Context),
-                                                       NumVectorElts));
-        } 
-        Mask[InsertedIdx] = 
-                           ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx);
-        return new ShuffleVectorInst(EI->getOperand(0), VecOp,
-                                     ConstantVector::get(Mask));
-      }
-      
        // If this insertelement isn't used by some other insertelement, turn it
        // (and any insertelements it points to), into one big shuffle.
        if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
@@ -12611,13 +12650,19 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
  /// many instructions are dead or constant).  Additionally, if we find a branch
  /// whose condition is a known constant, we only visit the reachable successors.
  ///
-static void AddReachableCodeToWorklist(BasicBlock *BB, 
+static bool AddReachableCodeToWorklist(BasicBlock *BB, 
                                         SmallPtrSet<BasicBlock*, 64> &Visited,
                                         InstCombiner &IC,
                                         const TargetData *TD) {
+  bool MadeIRChange = false;
    SmallVector<BasicBlock*, 256> Worklist;
    Worklist.push_back(BB);
+  
+  std::vector<Instruction*> InstrsForInstCombineWorklist;
+  InstrsForInstCombineWorklist.reserve(128);
  
+  SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
+  
    while (!Worklist.empty()) {
      BB = Worklist.back();
      Worklist.pop_back();
@@ -12625,7 +12670,6 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
      // We have now visited this block!  If we've already been here, ignore it.
      if (!Visited.insert(BB)) continue;
  
-    DbgInfoIntrinsic *DBI_Prev = NULL;
      for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
        Instruction *Inst = BBI++;
        
@@ -12638,32 +12682,40 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
        }
        
        // ConstantProp instruction if trivially constant.
-      if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
-        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
-                     << *Inst << '\n');
-        Inst->replaceAllUsesWith(C);
-        ++NumConstProp;
-        Inst->eraseFromParent();
-        continue;
-      }
-     
-      // If there are two consecutive llvm.dbg.stoppoint calls then
-      // it is likely that the optimizer deleted code in between these
-      // two intrinsics. 
-      DbgInfoIntrinsic *DBI_Next = dyn_cast<DbgInfoIntrinsic>(Inst);
-      if (DBI_Next) {
-        if (DBI_Prev
-            && DBI_Prev->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint
-            && DBI_Next->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint) {
-          IC.Worklist.Remove(DBI_Prev);
-          DBI_Prev->eraseFromParent();
+      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+        if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
+          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+                       << *Inst << '\n');
+          Inst->replaceAllUsesWith(C);
+          ++NumConstProp;
+          Inst->eraseFromParent();
+          continue;
+        }
+      
+      
+      
+      if (TD) {
+        // See if we can constant fold its operands.
+        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
+             i != e; ++i) {
+          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+          if (CE == 0) continue;
+          
+          // If we already folded this constant, don't try again.
+          if (!FoldedConstants.insert(CE))
+            continue;
+          
+          Constant *NewC =
+            ConstantFoldConstantExpression(CE, BB->getContext(), TD);
+          if (NewC && NewC != CE) {
+            *i = NewC;
+            MadeIRChange = true;
+          }
          }
-        DBI_Prev = DBI_Next;
-      } else {
-        DBI_Prev = 0;
        }
+      
  
-      IC.Worklist.Add(Inst);
+      InstrsForInstCombineWorklist.push_back(Inst);
      }
  
      // Recursively visit successors.  If this is a branch or switch on a
@@ -12695,11 +12747,20 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
        Worklist.push_back(TI->getSuccessor(i));
    }
+  
+  // Once we've found all of the instructions to add to instcombine's worklist,
+  // add them in reverse order.  This way instcombine will visit from the top
+  // of the function down.  This jives well with the way that it adds all uses
+  // of instructions to the worklist after doing a transformation, thus avoiding
+  // some N^2 behavior in pathological cases.
+  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+                              InstrsForInstCombineWorklist.size());
+  
+  return MadeIRChange;
  }
  
  bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
-  bool Changed = false;
-  TD = getAnalysisIfAvailable<TargetData>();
+  MadeIRChange = false;
    
    DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
          << F.getNameStr() << "\n");
@@ -12709,7 +12770,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
      // the reachable instructions.  Ignore blocks that are not reachable.  Keep
      // track of which blocks we visit.
      SmallPtrSet<BasicBlock*, 64> Visited;
-    AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
  
      // Do a quick scan over the function.  If we find any blocks that are
      // unreachable, remove any instructions inside of them.  This prevents
@@ -12725,9 +12786,12 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
            // going to do one without it.
            if (!isa<DbgInfoIntrinsic>(I)) {
              ++NumDeadInst;
-            Changed = true;
+            MadeIRChange = true;
            }
-          if (!I->use_empty())
+
+          // If I is not void type then replaceAllUsesWith undef.
+          // This allows ValueHandlers and custom metadata to adjust itself.
+          if (!I->getType()->isVoidTy())
              I->replaceAllUsesWith(UndefValue::get(I->getType()));
            I->eraseFromParent();
          }
@@ -12743,38 +12807,35 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
        DEBUG(errs() << "IC: DCE: " << *I << '\n');
        EraseInstFromFunction(*I);
        ++NumDeadInst;
-      Changed = true;
+      MadeIRChange = true;
        continue;
      }
  
      // Instruction isn't dead, see if we can constant propagate it.
-    if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
-      DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+      if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
+        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
  
-      // Add operands to the worklist.
-      ReplaceInstUsesWith(*I, C);
-      ++NumConstProp;
-      EraseInstFromFunction(*I);
-      Changed = true;
-      continue;
-    }
-
-    if (TD) {
-      // See if we can constant fold its operands.
-      for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i))
-          if (Constant *NewC = ConstantFoldConstantExpression(CE,   
-                                  F.getContext(), TD))
-            if (NewC != CE) {
-              i->set(NewC);
-              Changed = true;
-            }
-    }
+        // Add operands to the worklist.
+        ReplaceInstUsesWith(*I, C);
+        ++NumConstProp;
+        EraseInstFromFunction(*I);
+        MadeIRChange = true;
+        continue;
+      }
  
      // See if we can trivially sink this instruction to a successor basic block.
      if (I->hasOneUse()) {
        BasicBlock *BB = I->getParent();
-      BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent();
+      Instruction *UserInst = cast<Instruction>(I->use_back());
+      BasicBlock *UserParent;
+      
+      // Get the block the use occurs in.
+      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+      else
+        UserParent = UserInst->getParent();
+      
        if (UserParent != BB) {
          bool UserIsSuccessor = false;
          // See if the user is one of our successors.
@@ -12787,10 +12848,9 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
          // If the user is one of our immediate successors, and if that successor
          // only has us as a predecessors (we'd have to split the critical edge
          // otherwise), we can keep going.
-        if (UserIsSuccessor && !isa<PHINode>(I->use_back()) &&
-            next(pred_begin(UserParent)) == pred_end(UserParent))
+        if (UserIsSuccessor && UserParent->getSinglePredecessor())
            // Okay, the CFG is simple enough, try to sink this instruction.
-          Changed |= TryToSinkInstruction(I, UserParent);
+          MadeIRChange |= TryToSinkInstruction(I, UserParent);
        }
      }
  
@@ -12801,7 +12861,8 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
      std::string OrigI;
  #endif
      DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
-    
+    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+
      if (Instruction *Result = visit(*I)) {
        ++NumCombined;
        // Should we replace the old instruction with a new one?
@@ -12845,24 +12906,25 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
            Worklist.AddUsersToWorkList(*I);
          }
        }
-      Changed = true;
+      MadeIRChange = true;
      }
    }
  
    Worklist.Zap();
-  return Changed;
+  return MadeIRChange;
  }
  
  
  bool InstCombiner::runOnFunction(Function &F) {
    MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
    Context = &F.getContext();
-  
+  TD = getAnalysisIfAvailable<TargetData>();
+
    
    /// Builder - This is an IRBuilder that automatically inserts new
    /// instructions into the worklist when they are created.
-  IRBuilder<true, ConstantFolder, InstCombineIRInserter> 
-    TheBuilder(F.getContext(), ConstantFolder(F.getContext()),
+  IRBuilder<true, TargetFolder, InstCombineIRInserter> 
+    TheBuilder(F.getContext(), TargetFolder(TD, F.getContext()),
                 InstCombineIRInserter(Worklist));
    Builder = &TheBuilder;