/// X*Scale+Offset.
///
static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
- int &Offset) {
- assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!");
+ uint64_t &Offset) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
Offset = CI->getZExtValue();
Scale = 0;
- return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0);
+ return ConstantInt::get(Val->getType(), 0);
}
if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
if (I->getOpcode() == Instruction::Shl) {
// This is a value scaled by '1 << the shift amt'.
- Scale = 1U << RHS->getZExtValue();
+ Scale = UINT64_C(1) << RHS->getZExtValue();
Offset = 0;
return I->getOperand(0);
}
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
unsigned ArraySizeScale;
- int ArrayOffset;
+ uint64_t ArrayOffset;
Value *NumElements = // See if the array size is a decomposable linear expr.
DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
if (Scale == 1) {
Amt = NumElements;
} else {
- Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale);
+ Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
// Insert before the alloca, not before the cast.
Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
}
- if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
- Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()),
+ if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+ Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
Offset, true);
Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
}
return Instruction::CastOps(Res);
}
-/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
-/// in any code being generated. It does not require codegen if V is simple
-/// enough or if the cast can be folded into other casts.
-bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
- const Type *Ty) {
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+ const Type *Ty) {
+ // Noop casts and casts of constants should be eliminated trivially.
if (V->getType() == Ty || isa<Constant>(V)) return false;
- // If this is another cast that can be eliminated, it isn't codegen either.
+ // If this is another cast that can be eliminated, we prefer to have it
+ // eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V))
- if (isEliminableCastPair(CI, opcode, Ty, TD))
+ if (isEliminableCastPair(CI, opc, Ty, TD))
return false;
+
+ // If this is a vector sext from a compare, then we don't want to break the
+ // idiom where each element of the extended vector is either zero or all ones.
+ if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+ return false;
+
return true;
}
if (isa<PHINode>(Src)) {
// We don't do this if this would create a PHI node with an illegal type if
// it is currently legal.
- if (!isa<IntegerType>(Src->getType()) ||
- !isa<IntegerType>(CI.getType()) ||
+ if (!Src->getType()->isIntegerTy() ||
+ !CI.getType()->isIntegerTy() ||
ShouldChangeType(CI.getType(), Src->getType()))
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
/// makes sense if x and y can be efficiently truncated.
///
+/// This function works on both vectors and scalars.
+///
static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
// We can always evaluate constants in another type.
if (isa<Constant>(V))
const Type *OrigTy = V->getType();
- // If this is an extension from the dest type, we can eliminate it.
+ // If this is an extension from the dest type, we can eliminate it, even if it
+ // has multiple uses.
if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
I->getOperand(0)->getType() == Ty)
return true;
case Instruction::Trunc:
// trunc(trunc(x)) -> trunc(x)
return true;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+ // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+ return true;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateTruncated(Src, DestTy)) {
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
- " to avoid cast: " << CI);
+ " to avoid cast: " << CI << '\n');
Value *Res = EvaluateInDifferentType(Src, DestTy, false);
assert(Res->getType() == DestTy);
return ReplaceInstUsesWith(CI, Res);
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
+
+ // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+ Value *A = 0; ConstantInt *Cst = 0;
+ if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
+ Src->hasOneUse()) {
+ // We have three types to worry about here, the type of A, the source of
+ // the truncate (MidSize), and the destination of the truncate. We know that
+ // ASize < MidSize and MidSize > ResultSize, but don't know the relation
+ // between ASize and ResultSize.
+ unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+
+ // If the shift amount is larger than the size of A, then the result is
+ // known to be zero because all the input bits got shifted out.
+ if (Cst->getZExtValue() >= ASize)
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+ // Since we're doing an lshr and a zero extend, and know that the shift
+ // amount is smaller than ASize, it is always safe to do the shift in A's
+ // type, then zero extend or truncate to the result.
+ Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ }
return 0;
}
if (CI.getType() == In->getType())
return ReplaceInstUsesWith(CI, In);
- else
- return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
}
}
}
}
/// CanEvaluateZExtd - Determine if the specified value can be computed in the
-/// specified wider type and produce the same low bits. If not, return -1. If
-/// it is possible, return the number of high bits that are known to be zero in
-/// the promoted value.
-static bool CanEvaluateZExtd(Value *V, const Type *Ty, const TargetData *TD) {
+/// specified wider type and produce the same low bits. If not, return false.
+///
+/// If this function returns true, it can also return a non-zero number of bits
+/// (in BitsToClear) which indicates that the value it computes is correct for
+/// the zero extend, but that the additional BitsToClear bits need to be zero'd
+/// out. For example, to promote something like:
+///
+/// %B = trunc i64 %A to i32
+/// %C = lshr i32 %B, 8
+/// %E = zext i32 %C to i64
+///
+/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
+/// set to 8 to indicate that the promoted value needs to have bits 24-31
+/// cleared in addition to bits 32-63. Since an 'and' will be generated to
+/// clear the top bits anyway, doing this has no extra cost.
+///
+/// This function works on both vectors and scalars.
+static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
+ BitsToClear = 0;
if (isa<Constant>(V))
return true;
if (!I) return false;
// If the input is a truncate from the destination type, we can trivially
- // eliminate it.
- if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ // eliminate it, even if it has multiple uses.
+ // FIXME: This is currently disabled until codegen can handle this without
+ // pessimizing code, PR5997.
+ if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
// We can't extend or shrink something that has multiple uses: doing so would
// require duplicating the instruction in general, which isn't profitable.
if (!I->hasOneUse()) return false;
- unsigned Opc = I->getOpcode();
+ unsigned Opc = I->getOpcode(), Tmp;
switch (Opc) {
case Instruction::ZExt: // zext(zext(x)) -> zext(x).
case Instruction::SExt: // zext(sext(x)) -> sext(x).
case Instruction::Sub:
case Instruction::Mul:
case Instruction::Shl:
- return CanEvaluateZExtd(I->getOperand(0), Ty, TD) &&
- CanEvaluateZExtd(I->getOperand(1), Ty, TD);
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
+ !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+ return false;
+ // These can all be promoted if neither operand has 'bits to clear'.
+ if (BitsToClear == 0 && Tmp == 0)
+ return true;
- //case Instruction::LShr:
+ // If the operation is an AND/OR/XOR and the bits to clear are zero in the
+ // other side, BitsToClear is ok.
+ if (Tmp == 0 &&
+ (Opc == Instruction::And || Opc == Instruction::Or ||
+ Opc == Instruction::Xor)) {
+ // We use MaskedValueIsZero here for generality, but the case we care
+ // about the most is constant RHS.
+ unsigned VSize = V->getType()->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(1),
+ APInt::getHighBitsSet(VSize, BitsToClear)))
+ return true;
+ }
+
+ // Otherwise, we don't know how to analyze this BitsToClear case yet.
+ return false;
+ case Instruction::LShr:
+ // We can promote lshr(x, cst) if we can promote x. This requires the
+ // ultimate 'and' to clear out the high zero bits we're clearing out though.
+ if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+ return false;
+ BitsToClear += Amt->getZExtValue();
+ if (BitsToClear > V->getType()->getScalarSizeInBits())
+ BitsToClear = V->getType()->getScalarSizeInBits();
+ return true;
+ }
+ // Cannot promote variable LSHR.
+ return false;
case Instruction::Select:
- return CanEvaluateZExtd(I->getOperand(1), Ty, TD) &&
- CanEvaluateZExtd(I->getOperand(2), Ty, TD);
+ if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
+ !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+ // TODO: If important, we could handle the case when the BitsToClear are
+ // known zero in the disagreeing side.
+ Tmp != BitsToClear)
+ return false;
+ return true;
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, TD)) return false;
+ if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+ return false;
for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, TD)) return false;
+ if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+ // TODO: If important, we could handle the case when the BitsToClear
+ // are known zero in the disagreeing input.
+ Tmp != BitsToClear)
+ return false;
return true;
}
default:
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateZExtd(Src, DestTy, TD)) {
+ unsigned BitsToClear;
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
+ assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
+ "Unreasonable BitsToClear");
+
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
" to avoid zero extend: " << CI);
Value *Res = EvaluateInDifferentType(Src, DestTy, false);
assert(Res->getType() == DestTy);
+ uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
// If the high bits are already filled with zeros, just replace this
// cast with the result.
- uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
- uint32_t DestBitSize = DestTy->getScalarSizeInBits();
if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
- DestBitSize-SrcBitSize)))
+ DestBitSize-SrcBitsKept)))
return ReplaceInstUsesWith(CI, Res);
// We need to emit an AND to clear the high bits.
Constant *C = ConstantInt::get(Res->getType(),
- APInt::getLowBitsSet(DestBitSize, SrcBitSize));
+ APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
return BinaryOperator::CreateAnd(Res, C);
}
// zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
Value *X;
- if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) &&
+ if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
match(SrcI, m_Not(m_Value(X))) &&
(!X->hasOneUse() || !isa<CmpInst>(X))) {
Value *New = Builder->CreateZExt(X, CI.getType());
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateSExtd(Value *V, const Type *Ty, TargetData *TD) {
+static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
"Can't sign extend type to a smaller type");
// If this is a constant, it can be trivially promoted.
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
- // If this is a truncate from the dest type, we can trivially eliminate it.
- if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ // If this is a truncate from the dest type, we can trivially eliminate it,
+ // even if it has multiple uses.
+ // FIXME: This is currently disabled until codegen can handle this without
+ // pessimizing code, PR5997.
+ if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
// We can't extend or shrink something that has multiple uses: doing so would
case Instruction::Sub:
case Instruction::Mul:
// These operators can all arbitrarily be extended if their inputs can.
- return CanEvaluateSExtd(I->getOperand(0), Ty, TD) &&
- CanEvaluateSExtd(I->getOperand(1), Ty, TD);
+ return CanEvaluateSExtd(I->getOperand(0), Ty) &&
+ CanEvaluateSExtd(I->getOperand(1), Ty);
//case Instruction::Shl: TODO
//case Instruction::LShr: TODO
- //case Instruction::Trunc: TODO
case Instruction::Select:
- return CanEvaluateSExtd(I->getOperand(1), Ty, TD) &&
- CanEvaluateSExtd(I->getOperand(2), Ty, TD);
+ return CanEvaluateSExtd(I->getOperand(1), Ty) &&
+ CanEvaluateSExtd(I->getOperand(2), Ty);
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty, TD)) return false;
+ if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
return true;
}
default:
Value *Src = CI.getOperand(0);
const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
- // Canonicalize sign-extend from i1 to a select.
- if (Src->getType()->isInteger(1))
- return SelectInst::Create(Src,
- Constant::getAllOnesValue(CI.getType()),
- Constant::getNullValue(CI.getType()));
-
// Attempt to extend the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateSExtd(Src, DestTy, TD)) {
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
" to avoid sign extend: " << CI);
ShAmt);
}
+ // If this input is a trunc from our destination, then turn sext(trunc(x))
+ // into shifts.
+ if (TruncInst *TI = dyn_cast<TruncInst>(Src))
+ if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // We need to emit a shl + ashr to do the sign extend.
+ Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+ Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
+ return BinaryOperator::CreateAShr(Res, ShAmt);
+ }
+
+
+ // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed
+ // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed
+ {
+ ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS;
+ if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) {
+ // sext (x <s 0) to i32 --> x>>s31 true if signbit set.
+ // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear.
+ if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) ||
+ (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) {
+ Value *Sh = ConstantInt::get(CmpLHS->getType(),
+ CmpLHS->getType()->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+
+ if (Pred == ICmpInst::ICMP_SGT)
+ In = Builder->CreateNot(In, In->getName()+".not");
+ return ReplaceInstUsesWith(CI, In);
+ }
+ }
+ }
+
+ // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
+ if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) {
+ ICmpInst::Predicate Pred; Value *CmpLHS;
+ if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) {
+ if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) {
+ const Type *EltTy = VTy->getElementType();
+
+ // splat the shift constant to a cosntant vector
+ Constant *Sh = ConstantInt::get(EltTy, EltTy->getScalarSizeInBits()-1);
+ std::vector<Constant *> Elts(VTy->getNumElements(), Sh);
+ Constant *VSh = ConstantVector::get(Elts);
+
+ Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit");
+ return ReplaceInstUsesWith(CI, In);
+ }
+ }
+ }
+
// If the input is a shl/ashr pair of a same constant, then this is a sign
// extension from a smaller value. If we could trust arbitrary bitwidth
// integers, we could turn this into a truncate to the smaller bit and then
break;
}
}
+
+ // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
+ CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
+ if (Call && Call->getCalledFunction() &&
+ Call->getCalledFunction()->getName() == "sqrt" &&
+ Call->getNumArgOperands() == 1) {
+ CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
+ if (Arg && Arg->getOpcode() == Instruction::FPExt &&
+ CI.getType()->isFloatTy() &&
+ Call->getType()->isDoubleTy() &&
+ Arg->getType()->isDoubleTy() &&
+ Arg->getOperand(0)->getType()->isFloatTy()) {
+ Function *Callee = Call->getCalledFunction();
+ Module *M = CI.getParent()->getParent()->getParent();
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Callee->getAttributes(),
+ Builder->getFloatTy(),
+ Builder->getFloatTy(),
+ NULL);
+ CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
+ "sqrtfcall");
+ ret->setAttributes(Callee->getAttributes());
+
+
+ // Remove the old Call. With -fmath-errno, it won't get marked readnone.
+ Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+ EraseInstFromFunction(*Call);
+ return ret;
+ }
+ }
+
return 0;
}
}
Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
- // If the source integer type is larger than the intptr_t type for
- // this target, do a trunc to the intptr_t type, then inttoptr of it. This
- // allows the trunc to be exposed to other transforms. Don't do this for
- // extending inttoptr's, because we don't know if the target sign or zero
- // extends to pointers.
- if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() >
- TD->getPointerSizeInBits()) {
- Value *P = Builder->CreateTrunc(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()), "tmp");
- return new IntToPtrInst(P, CI.getType());
+ // If the source integer type is not the intptr_t type for this target, do a
+ // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
+ // cast to be exposed to other transforms.
+ if (TD) {
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
+ TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreateTrunc(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()), "tmp");
+ return new IntToPtrInst(P, CI.getType());
+ }
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
+ TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreateZExt(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()), "tmp");
+ return new IntToPtrInst(P, CI.getType());
+ }
}
if (Instruction *I = commonCastTransforms(CI))
}
Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
- // If the destination integer type is smaller than the intptr_t type for
- // this target, do a ptrtoint to intptr_t then do a trunc. This allows the
- // trunc to be exposed to other transforms. Don't do this for extending
- // ptrtoint's, because we don't know if the target sign or zero extends its
- // pointers.
- if (TD &&
- CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
- Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()),
- "tmp");
- return new TruncInst(P, CI.getType());
+ // If the destination integer type is not the intptr_t type for this target,
+ // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
+ // to be exposed to other transforms.
+ if (TD) {
+ if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()),
+ "tmp");
+ return new TruncInst(P, CI.getType());
+ }
+ if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()),
+ "tmp");
+ return new ZExtInst(P, CI.getType());
+ }
}
return commonPointerCastTransforms(CI);
}
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type. Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+ InstCombiner &IC) {
+ // We can only do this optimization if the output is a multiple of the input
+ // element size, or the input is a multiple of the output element size.
+ // Convert the input type to have the same element type as the output.
+ const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+
+ if (SrcTy->getElementType() != DestTy->getElementType()) {
+ // The input types don't need to be identical, but for now they must be the
+ // same size. There is no specific reason we couldn't handle things like
+ // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+ // there yet.
+ if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+ DestTy->getElementType()->getPrimitiveSizeInBits())
+ return 0;
+
+ SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+ InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+ }
+
+ // Now that the element types match, get the shuffle mask and RHS of the
+ // shuffle to use, which depends on whether we're increasing or decreasing the
+ // size of the input.
+ SmallVector<Constant*, 16> ShuffleMask;
+ Value *V2;
+ const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+
+ if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+ // If we're shrinking the number of elements, just shuffle in the low
+ // elements from the input and use undef as the second shuffle input.
+ V2 = UndefValue::get(SrcTy);
+ for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ } else {
+ // If we're increasing the number of elements, shuffle in all of the
+ // elements from InVal and fill the rest of the result elements with zeros
+ // from a constant zero.
+ V2 = Constant::getNullValue(SrcTy);
+ unsigned SrcElts = SrcTy->getNumElements();
+ for (unsigned i = 0, e = SrcElts; i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ // The excess elements reference the first element of the zero input.
+ ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+ ConstantInt::get(Int32Ty, SrcElts));
+ }
+
+ Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
+ return new ShuffleVectorInst(InVal, V2, Mask);
+}
+
+static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+ return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+ return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy. Look through the value to see if we can decompose it into
+/// insertions into the vector. See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+ SmallVectorImpl<Value*> &Elements,
+ const Type *VecEltTy) {
+ // Undef values never contribute useful bits to the result.
+ if (isa<UndefValue>(V)) return true;
+
+ // If we got down to a value of the right type, we win, try inserting into the
+ // right element.
+ if (V->getType() == VecEltTy) {
+ // Inserting null doesn't actually insert any elements.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return true;
+
+ // Fail if multiple elements are inserted into this slot.
+ if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ return false;
+
+ Elements[ElementIndex] = V;
+ return true;
+ }
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Figure out the # elements this provides, and bitcast it or slice it up
+ // as required.
+ unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+ VecEltTy);
+ // If the constant is the size of a vector element, we just need to bitcast
+ // it to the right type so it gets properly inserted.
+ if (NumElts == 1)
+ return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ ElementIndex, Elements, VecEltTy);
+
+ // Okay, this is a constant that covers multiple elements. Slice it up into
+ // pieces and insert each element-sized piece into the vector.
+ if (!isa<IntegerType>(C->getType()))
+ C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+ C->getType()->getPrimitiveSizeInBits()));
+ unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+ const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+ i*ElementSize));
+ Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+ if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ return false;
+ }
+ return true;
+ }
+
+ if (!V->hasOneUse()) return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return false;
+ switch (I->getOpcode()) {
+ default: return false; // Unhandled case.
+ case Instruction::BitCast:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::ZExt:
+ if (!isMultipleOfTypeSize(
+ I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+ VecEltTy))
+ return false;
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Or:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy) &&
+ CollectInsertionElements(I->getOperand(1), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Shl: {
+ // Must be shifting by a constant that is a multiple of the element size.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+ if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+ unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+
+ return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+ Elements, VecEltTy);
+ }
+
+ }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements. This is to
+/// optimize code like this:
+///
+/// %tmp37 = bitcast float %inc to i32
+/// %tmp38 = zext i32 %tmp37 to i64
+/// %tmp31 = bitcast float %inc5 to i32
+/// %tmp32 = zext i32 %tmp31 to i64
+/// %tmp33 = shl i64 %tmp32, 32
+/// %ins35 = or i64 %tmp33, %tmp38
+/// %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+ InstCombiner &IC) {
+ const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ Value *IntInput = CI.getOperand(0);
+
+ SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+ if (!CollectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType()))
+ return 0;
+
+ // If we succeeded, we know that all of the element are specified by Elements
+ // or are zero if Elements has a null entry. Recast this as a set of
+ // insertions.
+ Value *Result = Constant::getNullValue(CI.getType());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] == 0) continue; // Unset element.
+
+ Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+ IC.Builder->getInt32(i));
+ }
+
+ return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast. The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ Value *Src = CI.getOperand(0);
+ const Type *DestTy = CI.getType();
+
+ // If this is a bitcast from int to float, check to see if the int is an
+ // extraction from a vector.
+ Value *VecInput = 0;
+ // bitcast(trunc(bitcast(somevector)))
+ if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ }
+ }
+
+ // bitcast(trunc(lshr(bitcast(somevector), cst))
+ ConstantInt *ShAmt = 0;
+ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShAmt)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+ ShAmt->getZExtValue() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ }
+ }
+ return 0;
+}
+
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
// otherwise just apply the common ones.
Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
unsigned NumZeros = 0;
while (SrcElTy != DstElTy &&
- isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+ isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
SrcElTy->getNumContainedTypes() /* not "{}" */) {
SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
++NumZeros;
((Instruction*)NULL));
}
}
+
+ // Try to optimize int -> float bitcasts.
+ if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ return I;
if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
- if (DestVTy->getNumElements() == 1 && !isa<VectorType>(SrcTy)) {
+ if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
}
+
+ if (isa<IntegerType>(SrcTy)) {
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ cast<VectorType>(DestTy), *this))
+ return I;
+ }
+
+ // If the input is an 'or' instruction, we may be doing shifts and ors to
+ // assemble the elements of the vector manually. Try to rip the code out
+ // and replace it with insertelements.
+ if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ return ReplaceInstUsesWith(CI, V);
+ }
}
if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
- if (SrcVTy->getNumElements() == 1 && !isa<VectorType>(DestTy)) {
+ if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
Value *Elem =
Builder->CreateExtractElement(Src,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
- // a bitconvert to a vector with the same # elts.
- if (SVI->hasOneUse() && isa<VectorType>(DestTy) &&
+ // a bitcast to a vector with the same # elts.
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
cast<VectorType>(DestTy)->getNumElements() ==
SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
}
}
- if (isa<PointerType>(SrcTy))
+ if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
}