Remember to actually update SplitAnalysis statistics now that we have a fancy

[oota-llvm.git] / lib / Transforms / InstCombine / InstCombineCasts.cpp
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp

index 49b2e22a6815424eca406ea808bfeb7b63447730..dbe5200d0009fa1371d33897089e5a5c2a252c31 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -22,19 +22,18 @@ using namespace PatternMatch;
  /// X*Scale+Offset.
  ///
  static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
-                                        int &Offset) {
-  assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!");
+                                        uint64_t &Offset) {
    if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
      Offset = CI->getZExtValue();
      Scale  = 0;
-    return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0);
+    return ConstantInt::get(Val->getType(), 0);
    }
    
    if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
      if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
        if (I->getOpcode() == Instruction::Shl) {
          // This is a value scaled by '1 << the shift amt'.
-        Scale = 1U << RHS->getZExtValue();
+        Scale = UINT64_C(1) << RHS->getZExtValue();
          Offset = 0;
          return I->getOperand(0);
        }
@@ -100,7 +99,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
    // See if we can satisfy the modulus by pulling a scale out of the array
    // size argument.
    unsigned ArraySizeScale;
-  int ArrayOffset;
+  uint64_t ArrayOffset;
    Value *NumElements = // See if the array size is a decomposable linear expr.
      DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
   
@@ -114,13 +113,13 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
    if (Scale == 1) {
      Amt = NumElements;
    } else {
-    Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale);
+    Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
      // Insert before the alloca, not before the cast.
      Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
    }
    
-  if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
-    Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()),
+  if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+    Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
                                    Offset, true);
      Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
    }
@@ -255,17 +254,26 @@ isEliminableCastPair(
    return Instruction::CastOps(Res);
  }
  
-/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
-/// in any code being generated.  It does not require codegen if V is simple
-/// enough or if the cast can be folded into other casts.
-bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
-                                     const Type *Ty) {
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+                                      const Type *Ty) {
+  // Noop casts and casts of constants should be eliminated trivially.
    if (V->getType() == Ty || isa<Constant>(V)) return false;
    
-  // If this is another cast that can be eliminated, it isn't codegen either.
+  // If this is another cast that can be eliminated, we prefer to have it
+  // eliminated.
    if (const CastInst *CI = dyn_cast<CastInst>(V))
-    if (isEliminableCastPair(CI, opcode, Ty, TD))
+    if (isEliminableCastPair(CI, opc, Ty, TD))
        return false;
+  
+  // If this is a vector sext from a compare, then we don't want to break the
+  // idiom where each element of the extended vector is either zero or all ones.
+  if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+    return false;
+  
    return true;
  }
  
@@ -294,8 +302,8 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
    if (isa<PHINode>(Src)) {
      // We don't do this if this would create a PHI node with an illegal type if
      // it is currently legal.
-    if (!isa<IntegerType>(Src->getType()) ||
-        !isa<IntegerType>(CI.getType()) ||
+    if (!Src->getType()->isIntegerTy() ||
+        !CI.getType()->isIntegerTy() ||
          ShouldChangeType(CI.getType(), Src->getType()))
        if (Instruction *NV = FoldOpIntoPhi(CI))
          return NV;
@@ -313,6 +321,8 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
  /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
  /// makes sense if x and y can be efficiently truncated.
  ///
+/// This function works on both vectors and scalars.
+///
  static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
    // We can always evaluate constants in another type.
    if (isa<Constant>(V))
@@ -323,7 +333,8 @@ static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
    
    const Type *OrigTy = V->getType();
    
-  // If this is an extension from the dest type, we can eliminate it.
+  // If this is an extension from the dest type, we can eliminate it, even if it
+  // has multiple uses.
    if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 
        I->getOperand(0)->getType() == Ty)
      return true;
@@ -424,13 +435,13 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
    // type.   Only do this if the dest type is a simple type, don't convert the
    // expression tree to something weird like i93 unless the source is also
    // strange.
-  if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
        CanEvaluateTruncated(Src, DestTy)) {
        
      // If this cast is a truncate, evaluting in a different type always
      // eliminates the cast, so it is always a win.
      DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
-          " to avoid cast: " << CI);
+          " to avoid cast: " << CI << '\n');
      Value *Res = EvaluateInDifferentType(Src, DestTy, false);
      assert(Res->getType() == DestTy);
      return ReplaceInstUsesWith(CI, Res);
@@ -578,10 +589,25 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
  }
  
  /// CanEvaluateZExtd - Determine if the specified value can be computed in the
-/// specified wider type and produce the same low bits.  If not, return -1.  If
-/// it is possible, return the number of high bits that are known to be zero in
-/// the promoted value.
-static bool CanEvaluateZExtd(Value *V, const Type *Ty, const TargetData *TD) {
+/// specified wider type and produce the same low bits.  If not, return false.
+///
+/// If this function returns true, it can also return a non-zero number of bits
+/// (in BitsToClear) which indicates that the value it computes is correct for
+/// the zero extend, but that the additional BitsToClear bits need to be zero'd
+/// out.  For example, to promote something like:
+///
+///   %B = trunc i64 %A to i32
+///   %C = lshr i32 %B, 8
+///   %E = zext i32 %C to i64
+///
+/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
+/// set to 8 to indicate that the promoted value needs to have bits 24-31
+/// cleared in addition to bits 32-63.  Since an 'and' will be generated to
+/// clear the top bits anyway, doing this has no extra cost.
+///
+/// This function works on both vectors and scalars.
+static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
+  BitsToClear = 0;
    if (isa<Constant>(V))
      return true;
    
@@ -589,15 +615,17 @@ static bool CanEvaluateZExtd(Value *V, const Type *Ty, const TargetData *TD) {
    if (!I) return false;
    
    // If the input is a truncate from the destination type, we can trivially
-  // eliminate it.
-  if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+  // eliminate it, even if it has multiple uses.
+  // FIXME: This is currently disabled until codegen can handle this without
+  // pessimizing code, PR5997.
+  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
      return true;
    
    // We can't extend or shrink something that has multiple uses: doing so would
    // require duplicating the instruction in general, which isn't profitable.
    if (!I->hasOneUse()) return false;
    
-  unsigned Opc = I->getOpcode();
+  unsigned Opc = I->getOpcode(), Tmp;
    switch (Opc) {
    case Instruction::ZExt:  // zext(zext(x)) -> zext(x).
    case Instruction::SExt:  // zext(sext(x)) -> sext(x).
@@ -610,23 +638,64 @@ static bool CanEvaluateZExtd(Value *V, const Type *Ty, const TargetData *TD) {
    case Instruction::Sub:
    case Instruction::Mul:
    case Instruction::Shl:
-    return CanEvaluateZExtd(I->getOperand(0), Ty, TD) &&
-           CanEvaluateZExtd(I->getOperand(1), Ty, TD);
+    if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
+        !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+      return false;
+    // These can all be promoted if neither operand has 'bits to clear'.
+    if (BitsToClear == 0 && Tmp == 0)
+      return true;
+      
+    // If the operation is an AND/OR/XOR and the bits to clear are zero in the
+    // other side, BitsToClear is ok.
+    if (Tmp == 0 &&
+        (Opc == Instruction::And || Opc == Instruction::Or ||
+         Opc == Instruction::Xor)) {
+      // We use MaskedValueIsZero here for generality, but the case we care
+      // about the most is constant RHS.
+      unsigned VSize = V->getType()->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(1),
+                            APInt::getHighBitsSet(VSize, BitsToClear)))
+        return true;
+    }
        
-  //case Instruction::LShr:
+    // Otherwise, we don't know how to analyze this BitsToClear case yet.
+    return false;
        
+  case Instruction::LShr:
+    // We can promote lshr(x, cst) if we can promote x.  This requires the
+    // ultimate 'and' to clear out the high zero bits we're clearing out though.
+    if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+        return false;
+      BitsToClear += Amt->getZExtValue();
+      if (BitsToClear > V->getType()->getScalarSizeInBits())
+        BitsToClear = V->getType()->getScalarSizeInBits();
+      return true;
+    }
+    // Cannot promote variable LSHR.
+    return false;
    case Instruction::Select:
-    return CanEvaluateZExtd(I->getOperand(1), Ty, TD) &&
-           CanEvaluateZExtd(I->getOperand(2), Ty, TD);
+    if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
+        !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+        // TODO: If important, we could handle the case when the BitsToClear are
+        // known zero in the disagreeing side.
+        Tmp != BitsToClear)
+      return false;
+    return true;
        
    case Instruction::PHI: {
      // We can change a phi if we can change all operands.  Note that we never
      // get into trouble with cyclic PHIs here because we only consider
      // instructions with a single use.
      PHINode *PN = cast<PHINode>(I);
-    if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, TD)) return false;
+    if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+      return false;
      for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, TD)) return false;
+      if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+          // TODO: If important, we could handle the case when the BitsToClear
+          // are known zero in the disagreeing input.
+          Tmp != BitsToClear)
+        return false;
      return true;
    }
    default:
@@ -657,25 +726,30 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
    // type.   Only do this if the dest type is a simple type, don't convert the
    // expression tree to something weird like i93 unless the source is also
    // strange.
-  if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
-      CanEvaluateZExtd(Src, DestTy, TD)) { 
+  unsigned BitsToClear;
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateZExtd(Src, DestTy, BitsToClear)) { 
+    assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
+           "Unreasonable BitsToClear");
+    
      // Okay, we can transform this!  Insert the new expression now.
      DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
            " to avoid zero extend: " << CI);
      Value *Res = EvaluateInDifferentType(Src, DestTy, false);
      assert(Res->getType() == DestTy);
      
+    uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
+    uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+    
      // If the high bits are already filled with zeros, just replace this
      // cast with the result.
-    uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
-    uint32_t DestBitSize = DestTy->getScalarSizeInBits();
      if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
-                                                     DestBitSize-SrcBitSize)))
+                                                     DestBitSize-SrcBitsKept)))
        return ReplaceInstUsesWith(CI, Res);
      
      // We need to emit an AND to clear the high bits.
      Constant *C = ConstantInt::get(Res->getType(),
-                               APInt::getLowBitsSet(DestBitSize, SrcBitSize));
+                               APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
      return BinaryOperator::CreateAnd(Res, C);
    }
  
@@ -762,7 +836,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
  
    // zext (xor i1 X, true) to i32  --> xor (zext i1 X to i32), 1
    Value *X;
-  if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) &&
+  if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
        match(SrcI, m_Not(m_Value(X))) &&
        (!X->hasOneUse() || !isa<CmpInst>(X))) {
      Value *New = Builder->CreateZExt(X, CI.getType());
@@ -780,7 +854,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
  ///
  /// This function works on both vectors and scalars.
  ///
-static bool CanEvaluateSExtd(Value *V, const Type *Ty, TargetData *TD) {
+static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
    assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
           "Can't sign extend type to a smaller type");
    // If this is a constant, it can be trivially promoted.
@@ -790,8 +864,11 @@ static bool CanEvaluateSExtd(Value *V, const Type *Ty, TargetData *TD) {
    Instruction *I = dyn_cast<Instruction>(V);
    if (!I) return false;
    
-  // If this is a truncate from the dest type, we can trivially eliminate it.
-  if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+  // If this is a truncate from the dest type, we can trivially eliminate it,
+  // even if it has multiple uses.
+  // FIXME: This is currently disabled until codegen can handle this without
+  // pessimizing code, PR5997.
+  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
      return true;
    
    // We can't extend or shrink something that has multiple uses: doing so would
@@ -810,16 +887,15 @@ static bool CanEvaluateSExtd(Value *V, const Type *Ty, TargetData *TD) {
    case Instruction::Sub:
    case Instruction::Mul:
      // These operators can all arbitrarily be extended if their inputs can.
-    return CanEvaluateSExtd(I->getOperand(0), Ty, TD) &&
-           CanEvaluateSExtd(I->getOperand(1), Ty, TD);
+    return CanEvaluateSExtd(I->getOperand(0), Ty) &&
+           CanEvaluateSExtd(I->getOperand(1), Ty);
        
    //case Instruction::Shl:   TODO
    //case Instruction::LShr:  TODO
-  //case Instruction::Trunc: TODO
        
    case Instruction::Select:
-    return CanEvaluateSExtd(I->getOperand(1), Ty, TD) &&
-           CanEvaluateSExtd(I->getOperand(2), Ty, TD);
+    return CanEvaluateSExtd(I->getOperand(1), Ty) &&
+           CanEvaluateSExtd(I->getOperand(2), Ty);
        
    case Instruction::PHI: {
      // We can change a phi if we can change all operands.  Note that we never
@@ -827,7 +903,7 @@ static bool CanEvaluateSExtd(Value *V, const Type *Ty, TargetData *TD) {
      // instructions with a single use.
      PHINode *PN = cast<PHINode>(I);
      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty, TD)) return false;
+      if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
      return true;
    }
    default:
@@ -855,18 +931,12 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
    Value *Src = CI.getOperand(0);
    const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
  
-  // Canonicalize sign-extend from i1 to a select.
-  if (Src->getType()->isInteger(1))
-    return SelectInst::Create(Src,
-                              Constant::getAllOnesValue(CI.getType()),
-                              Constant::getNullValue(CI.getType()));
-  
    // Attempt to extend the entire input expression tree to the destination
    // type.   Only do this if the dest type is a simple type, don't convert the
    // expression tree to something weird like i93 unless the source is also
    // strange.
-  if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
-      CanEvaluateSExtd(Src, DestTy, TD)) {
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateSExtd(Src, DestTy)) {
      // Okay, we can transform this!  Insert the new expression now.
      DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
            " to avoid sign extend: " << CI);
@@ -887,6 +957,43 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
                                        ShAmt);
    }
  
+  // If this input is a trunc from our destination, then turn sext(trunc(x))
+  // into shifts.
+  if (TruncInst *TI = dyn_cast<TruncInst>(Src))
+    if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
+      uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+      uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+      
+      // We need to emit a shl + ashr to do the sign extend.
+      Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+      Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
+      return BinaryOperator::CreateAShr(Res, ShAmt);
+    }
+  
+  
+  // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
+  // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
+  {
+  ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS;
+  if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) {
+    // sext (x <s  0) to i32 --> x>>s31       true if signbit set.
+    // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
+    if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) ||
+        (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) {
+      Value *Sh = ConstantInt::get(CmpLHS->getType(),
+                                   CmpLHS->getType()->getScalarSizeInBits()-1);
+      Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit");
+      if (In->getType() != CI.getType())
+        In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+      
+      if (Pred == ICmpInst::ICMP_SGT)
+        In = Builder->CreateNot(In, In->getName()+".not");
+      return ReplaceInstUsesWith(CI, In);
+    }
+  }
+  }
+  
+  
    // If the input is a shl/ashr pair of a same constant, then this is a sign
    // extension from a smaller value.  If we could trust arbitrary bitwidth
    // integers, we could turn this into a truncate to the smaller bit and then
@@ -990,6 +1097,33 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
        break;  
      }
    }
+  
+  // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+  // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
+  CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
+  if (Call && Call->getCalledFunction() &&
+      Call->getCalledFunction()->getName() == "sqrt" &&
+      Call->getNumArgOperands() == 1) {
+    CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
+    if (Arg && Arg->getOpcode() == Instruction::FPExt &&
+        CI.getType()->isFloatTy() &&
+        Call->getType()->isDoubleTy() &&
+        Arg->getType()->isDoubleTy() &&
+        Arg->getOperand(0)->getType()->isFloatTy()) {
+      Function *Callee = Call->getCalledFunction();
+      Module *M = CI.getParent()->getParent()->getParent();
+      Constant* SqrtfFunc = M->getOrInsertFunction("sqrtf", 
+                                                   Callee->getAttributes(),
+                                                   Builder->getFloatTy(),
+                                                   Builder->getFloatTy(),
+                                                   NULL);
+      CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
+                                       "sqrtfcall");
+      ret->setAttributes(Callee->getAttributes());
+      return ret;
+    }
+  }
+  
    return 0;
  }
  
@@ -1046,16 +1180,22 @@ Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
  }
  
  Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
-  // If the source integer type is larger than the intptr_t type for
-  // this target, do a trunc to the intptr_t type, then inttoptr of it.  This
-  // allows the trunc to be exposed to other transforms.  Don't do this for
-  // extending inttoptr's, because we don't know if the target sign or zero
-  // extends to pointers.
-  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() >
-      TD->getPointerSizeInBits()) {
-    Value *P = Builder->CreateTrunc(CI.getOperand(0),
-                                    TD->getIntPtrType(CI.getContext()), "tmp");
-    return new IntToPtrInst(P, CI.getType());
+  // If the source integer type is not the intptr_t type for this target, do a
+  // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
+  // cast to be exposed to other transforms.
+  if (TD) {
+    if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
+        TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreateTrunc(CI.getOperand(0),
+                                      TD->getIntPtrType(CI.getContext()), "tmp");
+      return new IntToPtrInst(P, CI.getType());
+    }
+    if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
+        TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreateZExt(CI.getOperand(0),
+                                     TD->getIntPtrType(CI.getContext()), "tmp");
+      return new IntToPtrInst(P, CI.getType());
+    }
    }
    
    if (Instruction *I = commonCastTransforms(CI))
@@ -1117,22 +1257,85 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
  }
  
  Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
-  // If the destination integer type is smaller than the intptr_t type for
-  // this target, do a ptrtoint to intptr_t then do a trunc.  This allows the
-  // trunc to be exposed to other transforms.  Don't do this for extending
-  // ptrtoint's, because we don't know if the target sign or zero extends its
-  // pointers.
-  if (TD &&
-      CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
-    Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
-                                       TD->getIntPtrType(CI.getContext()),
-                                       "tmp");
-    return new TruncInst(P, CI.getType());
+  // If the destination integer type is not the intptr_t type for this target,
+  // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
+  // to be exposed to other transforms.
+  if (TD) {
+    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                         TD->getIntPtrType(CI.getContext()),
+                                         "tmp");
+      return new TruncInst(P, CI.getType());
+    }
+    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                         TD->getIntPtrType(CI.getContext()),
+                                         "tmp");
+      return new ZExtInst(P, CI.getType());
+    }
    }
    
    return commonPointerCastTransforms(CI);
  }
  
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type.  Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+                                         InstCombiner &IC) {
+  // We can only do this optimization if the output is a multiple of the input
+  // element size, or the input is a multiple of the output element size.
+  // Convert the input type to have the same element type as the output.
+  const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+  
+  if (SrcTy->getElementType() != DestTy->getElementType()) {
+    // The input types don't need to be identical, but for now they must be the
+    // same size.  There is no specific reason we couldn't handle things like
+    // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+    // there yet. 
+    if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+        DestTy->getElementType()->getPrimitiveSizeInBits())
+      return 0;
+    
+    SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+    InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+  }
+  
+  // Now that the element types match, get the shuffle mask and RHS of the
+  // shuffle to use, which depends on whether we're increasing or decreasing the
+  // size of the input.
+  SmallVector<Constant*, 16> ShuffleMask;
+  Value *V2;
+  const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+  
+  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+    // If we're shrinking the number of elements, just shuffle in the low
+    // elements from the input and use undef as the second shuffle input.
+    V2 = UndefValue::get(SrcTy);
+    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+    
+  } else {
+    // If we're increasing the number of elements, shuffle in all of the
+    // elements from InVal and fill the rest of the result elements with zeros
+    // from a constant zero.
+    V2 = Constant::getNullValue(SrcTy);
+    unsigned SrcElts = SrcTy->getNumElements();
+    for (unsigned i = 0, e = SrcElts; i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+    // The excess elements reference the first element of the zero input.
+    ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+                       ConstantInt::get(Int32Ty, SrcElts));
+  }
+  
+  Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
+  return new ShuffleVectorInst(InVal, V2, Mask);
+}
+
+
  Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
    // If the operands are integer typed then apply the integer transforms,
    // otherwise just apply the common ones.
@@ -1170,7 +1373,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
        Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
      unsigned NumZeros = 0;
      while (SrcElTy != DstElTy && 
-           isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+           isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
             SrcElTy->getNumContainedTypes() /* not "{}" */) {
        SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
        ++NumZeros;
@@ -1185,16 +1388,28 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
    }
  
    if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
-    if (DestVTy->getNumElements() == 1 && !isa<VectorType>(SrcTy)) {
+    if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
        Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
        return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
                       Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
        // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
      }
+    
+    // If this is a cast from an integer to vector, check to see if the input
+    // is a trunc or zext of a bitcast from vector.  If so, we can replace all
+    // the casts with a shuffle and (potentially) a bitcast.
+    if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
+      CastInst *SrcCast = cast<CastInst>(Src);
+      if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+        if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+          if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+                                               cast<VectorType>(DestTy), *this))
+            return I;
+    }
    }
  
    if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
-    if (SrcVTy->getNumElements() == 1 && !isa<VectorType>(DestTy)) {
+    if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
        Value *Elem = 
          Builder->CreateExtractElement(Src,
                     Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
@@ -1204,8 +1419,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
  
    if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
      // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
-    // a bitconvert to a vector with the same # elts.
-    if (SVI->hasOneUse() && isa<VectorType>(DestTy) && 
+    // a bitcast to a vector with the same # elts.
+    if (SVI->hasOneUse() && DestTy->isVectorTy() && 
          cast<VectorType>(DestTy)->getNumElements() ==
                SVI->getType()->getNumElements() &&
          SVI->getType()->getNumElements() ==
@@ -1227,7 +1442,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      }
    }
    
-  if (isa<PointerType>(SrcTy))
+  if (SrcTy->isPointerTy())
      return commonPointerCastTransforms(CI);
    return commonCastTransforms(CI);
  }