: public FunctionPass,
public InstVisitor<InstCombiner, Instruction*> {
// Worklist of all of the instructions that need to be simplified.
- std::vector<Instruction*> Worklist;
+ SmallVector<Instruction*, 256> Worklist;
DenseMap<Instruction*, unsigned> WorklistMap;
TargetData *TD;
bool MustPreserveLCSSA;
public:
static char ID; // Pass identification, replacement for typeid
- InstCombiner() : FunctionPass((intptr_t)&ID) {}
+ InstCombiner() : FunctionPass(&ID) {}
/// AddToWorkList - Add the specified instruction to the worklist if it
/// isn't already in it.
Instruction *visitURem(BinaryOperator &I);
Instruction *visitSRem(BinaryOperator &I);
Instruction *visitFRem(BinaryOperator &I);
+ bool SimplifyDivRemOfSelect(BinaryOperator &I);
Instruction *commonRemTransforms(BinaryOperator &I);
Instruction *commonIRemTransforms(BinaryOperator &I);
Instruction *commonDivTransforms(BinaryOperator &I);
Instruction *visitBitCast(BitCastInst &CI);
Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI);
- Instruction *visitSelectInst(SelectInst &CI);
+ Instruction *visitSelectInst(SelectInst &SI);
+ Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
Instruction *visitCallInst(CallInst &CI);
Instruction *visitInvokeInst(InvokeInst &II);
Instruction *visitPHINode(PHINode &PN);
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
APInt RA = Rem->getValue();
if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
+ if (DemandedMask.ule(RA)) // srem won't affect demanded bits
+ return UpdateValueUsesWith(I, I->getOperand(0));
+
APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
if (SimplifyDemandedBits(I->getOperand(0), Mask2,
if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
LHSKnownZero |= ~LowBits;
- else if (LHSKnownOne[BitWidth-1])
- LHSKnownOne |= ~LowBits;
KnownZero |= LHSKnownZero & DemandedMask;
- KnownOne |= LHSKnownOne & DemandedMask;
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
}
}
break;
case Instruction::URem: {
- if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
- APInt RA = Rem->getValue();
- if (RA.isPowerOf2()) {
- APInt LowBits = (RA - 1);
- APInt Mask2 = LowBits & DemandedMask;
- KnownZero |= ~LowBits & DemandedMask;
- if (SimplifyDemandedBits(I->getOperand(0), Mask2,
- KnownZero, KnownOne, Depth+1))
- return true;
-
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- break;
- }
- }
-
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
if (SimplifyDemandedBits(I->getOperand(0), AllOnes,
unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
assert(VWidth <= 64 && "Vector too wide to analyze!");
uint64_t EltMask = ~0ULL >> (64-VWidth);
- assert(DemandedElts != EltMask && (DemandedElts & ~EltMask) == 0 &&
- "Invalid DemandedElts!");
+ assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
if (isa<UndefValue>(V)) {
// If the entire vector is undefined, just return this info.
return ConstantVector::get(Elts);
}
- if (!V->hasOneUse()) { // Other users may use these bits.
- if (Depth != 0) { // Not at the root.
+ // Limit search depth.
+ if (Depth == 10)
+ return false;
+
+ // If multiple users are using the root value, procede with
+ // simplification conservatively assuming that all elements
+ // are needed.
+ if (!V->hasOneUse()) {
+ // Quit if we find multiple users of a non-root value though.
+ // They'll be handled when it's their turn to be visited by
+ // the main instcombine process.
+ if (Depth != 0)
// TODO: Just compute the UndefElts information recursively.
return false;
- }
- return false;
- } else if (Depth == 10) { // Limit search depth.
- return false;
+
+ // Conservatively assume that all elements are needed.
+ DemandedElts = EltMask;
}
Instruction *I = dyn_cast<Instruction>(V);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
// The inserted element is defined.
- UndefElts |= 1ULL << IdxNo;
+ UndefElts &= ~(1ULL << IdxNo);
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+ uint64_t LeftDemanded = 0, RightDemanded = 0;
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (DemandedElts & (1ULL << i)) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal != -1u) {
+ assert(MaskVal < VWidth * 2 &&
+ "shufflevector mask index out of range!");
+ if (MaskVal < VWidth)
+ LeftDemanded |= 1ULL << MaskVal;
+ else
+ RightDemanded |= 1ULL << (MaskVal - VWidth);
+ }
+ }
+ }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ uint64_t UndefElts3;
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
+ UndefElts3, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ bool NewUndefElts = false;
+ for (unsigned i = 0; i < VWidth; i++) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal == -1u) {
+ uint64_t NewBit = 1ULL << i;
+ UndefElts |= NewBit;
+ } else if (MaskVal < VWidth) {
+ uint64_t NewBit = ((UndefElts2 >> MaskVal) & 1) << i;
+ NewUndefElts |= NewBit;
+ UndefElts |= NewBit;
+ } else {
+ uint64_t NewBit = ((UndefElts3 >> (MaskVal - VWidth)) & 1) << i;
+ NewUndefElts |= NewBit;
+ UndefElts |= NewBit;
+ }
+ }
+
+ if (NewUndefElts) {
+ // Add additional discovered undefs.
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i < VWidth; ++i) {
+ if (UndefElts & (1ULL << i))
+ Elts.push_back(UndefValue::get(Type::Int32Ty));
+ else
+ Elts.push_back(ConstantInt::get(Type::Int32Ty,
+ Shuffle->getMaskValue(i)));
+ }
+ I->setOperand(2, ConstantVector::get(Elts));
+ MadeChange = true;
+ }
break;
}
case Instruction::BitCast: {
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Op0 == Op1) // sub X, X -> 0
+ if (Op0 == Op1 && // sub X, X -> 0
+ !I.getType()->isFPOrFPVector())
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// If this is a 'B = x-(-A)', change to B = x+A...
// "In IEEE floating point, x*1 is not equivalent to x for nans. However,
// ANSI says we can drop signals, so we can do this anyway." (from GCC)
- // We need a better interface for long double here.
- if (Op1->getType() == Type::FloatTy || Op1->getType() == Type::DoubleTy)
- if (Op1F->isExactlyValue(1.0))
- return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
+ if (Op1F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
+ } else if (isa<VectorType>(Op1->getType())) {
+ if (isa<ConstantAggregateZero>(Op1))
+ return ReplaceInstUsesWith(I, Op1);
+
+ // As above, vector X*splat(1.0) -> X in all defined cases.
+ if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1))
+ if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue()))
+ if (F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0);
}
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
return Changed ? &I : 0;
}
+/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
+/// instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+ SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+
+ // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+ int NonNullOperand = -1;
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+ if (ST->isNullValue())
+ NonNullOperand = 2;
+ // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+ if (ST->isNullValue())
+ NonNullOperand = 1;
+
+ if (NonNullOperand == -1)
+ return false;
+
+ Value *SelectCond = SI->getOperand(0);
+
+ // Change the div/rem to use 'Y' instead of the select.
+ I.setOperand(1, SI->getOperand(NonNullOperand));
+
+ // Okay, we know we replace the operand of the div/rem with 'Y' with no
+ // problem. However, the select, or the condition of the select may have
+ // multiple uses. Based on our knowledge that the operand must be non-zero,
+ // propagate the known value for the select into other uses of it, and
+ // propagate a known value of the condition into its other users.
+
+ // If the select and condition only have a single use, don't bother with this,
+ // early exit.
+ if (SI->use_empty() && SelectCond->hasOneUse())
+ return true;
+
+ // Scan the current block backward, looking for other uses of SI.
+ BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+
+ while (BBI != BBFront) {
+ --BBI;
+ // If we found a call to a function, we can't assume it will return, so
+ // information from below it cannot be propagated above it.
+ if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+ break;
+
+ // Replace uses of the select or its condition with the known values.
+ for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+ I != E; ++I) {
+ if (*I == SI) {
+ *I = SI->getOperand(NonNullOperand);
+ AddToWorkList(BBI);
+ } else if (*I == SelectCond) {
+ *I = NonNullOperand == 1 ? ConstantInt::getTrue() :
+ ConstantInt::getFalse();
+ AddToWorkList(BBI);
+ }
+ }
+
+ // If we past the instruction, quit looking for it.
+ if (&*BBI == SI)
+ SI = 0;
+ if (&*BBI == SelectCond)
+ SelectCond = 0;
+
+ // If we ran out of things to eliminate, break out of the loop.
+ if (SelectCond == 0 && SI == 0)
+ break;
+
+ }
+ return true;
+}
+
+
/// This function implements the transforms on div instructions that work
/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
/// used by the visitors to those instructions.
if (isa<UndefValue>(Op1))
return ReplaceInstUsesWith(I, Op1);
- // Handle cases involving: [su]div X, (select Cond, Y, Z)
- // This does not apply for fdiv.
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
- // [su]div X, (Cond ? 0 : Y) -> div X, Y. If the div and the select are in
- // the same basic block, then we replace the select with Y, and the
- // condition of the select with false (if the cond value is in the same BB).
- // If the select has uses other than the div, this allows them to be
- // simplified also. Note that div X, Y is just as good as div X, 0 (undef)
- if (ConstantInt *ST = dyn_cast<ConstantInt>(SI->getOperand(1)))
- if (ST->isNullValue()) {
- Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
- if (CondI && CondI->getParent() == I.getParent())
- UpdateValueUsesWith(CondI, ConstantInt::getFalse());
- else if (I.getParent() != SI->getParent() || SI->hasOneUse())
- I.setOperand(1, SI->getOperand(2));
- else
- UpdateValueUsesWith(SI, SI->getOperand(2));
- return &I;
- }
-
- // Likewise for: [su]div X, (Cond ? Y : 0) -> div X, Y
- if (ConstantInt *ST = dyn_cast<ConstantInt>(SI->getOperand(2)))
- if (ST->isNullValue()) {
- Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
- if (CondI && CondI->getParent() == I.getParent())
- UpdateValueUsesWith(CondI, ConstantInt::getTrue());
- else if (I.getParent() != SI->getParent() || SI->hasOneUse())
- I.setOperand(1, SI->getOperand(1));
- else
- UpdateValueUsesWith(SI, SI->getOperand(1));
- return &I;
- }
- }
-
return 0;
}
if (Instruction *Common = commonDivTransforms(I))
return Common;
+
+ // Handle cases involving: [su]div X, (select Cond, Y, Z)
+ // This does not apply for fdiv.
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
// div X, 1 == X
return ReplaceInstUsesWith(I, Op1); // X % undef -> undef
// Handle cases involving: rem X, (select Cond, Y, Z)
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
- // rem X, (Cond ? 0 : Y) -> rem X, Y. If the rem and the select are in
- // the same basic block, then we replace the select with Y, and the
- // condition of the select with false (if the cond value is in the same
- // BB). If the select has uses other than the div, this allows them to be
- // simplified also.
- if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
- if (ST->isNullValue()) {
- Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
- if (CondI && CondI->getParent() == I.getParent())
- UpdateValueUsesWith(CondI, ConstantInt::getFalse());
- else if (I.getParent() != SI->getParent() || SI->hasOneUse())
- I.setOperand(1, SI->getOperand(2));
- else
- UpdateValueUsesWith(SI, SI->getOperand(2));
- return &I;
- }
- // Likewise for: rem X, (Cond ? Y : 0) -> rem X, Y
- if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
- if (ST->isNullValue()) {
- Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
- if (CondI && CondI->getParent() == I.getParent())
- UpdateValueUsesWith(CondI, ConstantInt::getTrue());
- else if (I.getParent() != SI->getParent() || SI->hasOneUse())
- I.setOperand(1, SI->getOperand(1));
- else
- UpdateValueUsesWith(SI, SI->getOperand(1));
- return &I;
- }
- }
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
return 0;
}
return common;
if (Value *RHSNeg = dyn_castNegVal(Op1))
- if (!isa<ConstantInt>(RHSNeg) ||
- cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive()) {
+ if (!isa<Constant>(RHSNeg) ||
+ (isa<ConstantInt>(RHSNeg) &&
+ cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
// X % -Y -> X % Y
AddUsesToWorkList(I);
I.setOperand(1, RHSNeg);
}
}
-
- { // (icmp ugt/ult A, C) & (icmp B, C) --> (icmp (A|B), C)
+ { // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
// where C is a power of 2
Value *A, *B;
ConstantInt *C1, *C2;
- ICmpInst::Predicate LHSCC, RHSCC;
+ ICmpInst::Predicate LHSCC = ICmpInst::BAD_ICMP_PREDICATE;
+ ICmpInst::Predicate RHSCC = ICmpInst::BAD_ICMP_PREDICATE;
if (match(&I, m_And(m_ICmp(LHSCC, m_Value(A), m_ConstantInt(C1)),
m_ICmp(RHSCC, m_Value(B), m_ConstantInt(C2)))))
- if (C1 == C2 && LHSCC == RHSCC && C1->getValue().isPowerOf2() &&
- (LHSCC == ICmpInst::ICMP_ULT || LHSCC == ICmpInst::ICMP_UGT)) {
+ if (C1 == C2 && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT &&
+ C1->getValue().isPowerOf2()) {
Instruction *NewOr = BinaryOperator::CreateOr(A, B);
InsertNewInstBefore(NewOr, I);
return new ICmpInst(LHSCC, NewOr, C1);
}
}
-
+
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
return Result->getValue().ult(In1->getValue());
}
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
+ ConstantInt *In2, bool IsSigned = false) {
+ Result = cast<ConstantInt>(Subtract(In1, In2));
+
+ if (IsSigned)
+ if (In2->getValue().isNegative())
+ return Result->getValue().slt(In1->getValue());
+ else
+ return Result->getValue().sgt(In1->getValue());
+ else
+ return Result->getValue().ugt(In1->getValue());
+}
+
/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
/// code necessary to compute the offset from the base pointer (without adding
/// in the base pointer). Return the result as a signed integer of intptr size.
}
}
- // See if we are doing a comparison between a constant and an instruction that
- // can be folded into the comparison.
+ // See if we are doing a comparison with a constant.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
Value *A, *B;
return new ICmpInst(I.getPredicate(), A, B);
}
- // If we have a icmp le or icmp ge instruction, turn it into the appropriate
- // icmp lt or icmp gt instruction. This allows us to rely on them being
- // folded in the code below.
+ // If we have an icmp le or icmp ge instruction, turn it into the
+ // appropriate icmp lt or icmp gt instruction. This allows us to rely on
+ // them being folded in the code below.
switch (I.getPredicate()) {
default: break;
case ICmpInst::ICMP_ULE:
return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
break;
}
-
+ }
+
+ // Test if the ICmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (I.hasOneUse())
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return 0;
+
+ // See if we are doing a comparison between a constant and an instruction that
+ // can be folded into the comparison.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
// Since the RHS is a ConstantInt (CI), if the left hand side is an
// instruction, see if that instruction also has constants so that the
// instruction can be folded into the icmp
if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() &&
Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1) &&
I.isEquality()) {
- switch (Op0I->getOpcode()) {
+ switch (Op0I->getOpcode()) {
default: break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
// a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+ // Mask = -1 >> count-trailing-zeros(Cst).
if (!CI->isZero() && !CI->isOne()) {
const APInt &AP = CI->getValue();
ConstantInt *Mask = ConstantInt::get(
// e.g. X/-5 op -3 --> [15, 20)
LoBound = Prod;
LoOverflow = HiOverflow = ProdOV ? 1 : 0;
- HiBound = Subtract(Prod, DivRHS);
+ if (!HiOverflow)
+ HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true);
}
// Dividing by a negative swaps the condition. LT <-> GT
return ReplaceInstUsesWith(I, CSI);
// See if we can turn a signed shr into an unsigned shr.
- if (MaskedValueIsZero(Op0,
+ if (!isa<VectorType>(I.getType()) &&
+ MaskedValueIsZero(Op0,
APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
return new TruncInst(Op, CI.getType(), "tmp");
}
}
-
+
+ // If the input is a shl/ashr pair of a same constant, then this is a sign
+ // extension from a smaller value. If we could trust arbitrary bitwidth
+ // integers, we could turn this into a truncate to the smaller bit and then
+ // use a sext for the whole extension. Since we don't, look deeper and check
+ // for a truncate. If the source and dest are the same type, eliminate the
+ // trunc and extend and just do shifts. For example, turn:
+ // %a = trunc i32 %i to i8
+ // %b = shl i8 %a, 6
+ // %c = ashr i8 %b, 6
+ // %d = sext i8 %c to i32
+ // into:
+ // %a = shl i32 %i, 30
+ // %d = ashr i32 %a, 30
+ Value *A = 0;
+ ConstantInt *BA = 0, *CA = 0;
+ if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)),
+ m_ConstantInt(CA))) &&
+ BA == CA && isa<TruncInst>(A)) {
+ Value *I = cast<TruncInst>(A)->getOperand(0);
+ if (I->getType() == CI.getType()) {
+ unsigned MidSize = Src->getType()->getPrimitiveSizeInBits();
+ unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits();
+ unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+ Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+ I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
+ CI.getName()), CI);
+ return BinaryOperator::CreateAShr(I, ShAmtV);
+ }
+ }
+
return 0;
}
}
Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
- // fptoui(uitofp(X)) --> X if the intermediate type has enough bits in its
- // mantissa to accurately represent all values of X. For example, do not
- // do this with i64->float->i64.
- if (UIToFPInst *SrcI = dyn_cast<UIToFPInst>(FI.getOperand(0)))
- if (SrcI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
- SrcI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptoui(uitofp(X)) --> X
+ // fptoui(sitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
return commonCastTransforms(FI);
}
Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
- // fptosi(sitofp(X)) --> X if the intermediate type has enough bits in its
- // mantissa to accurately represent all values of X. For example, do not
- // do this with i64->float->i64.
- if (SIToFPInst *SrcI = dyn_cast<SIToFPInst>(FI.getOperand(0)))
- if (SrcI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getPrimitiveSizeInBits() <=
- SrcI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptosi(sitofp(X)) --> X
+ // fptosi(uitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getPrimitiveSizeInBits() <=
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
return commonCastTransforms(FI);
}
return 0;
}
+/// visitSelectInstWithICmp - Visit a SelectInst that has an
+/// ICmpInst as its first operand.
+///
+Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
+ ICmpInst *ICI) {
+ bool Changed = false;
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ // Check cases where the comparison is with a constant that
+ // can be adjusted to fit the min/max idiom. We may edit ICI in
+ // place here, so make sure the select is the only user.
+ if (ICI->hasOneUse())
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS))
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: {
+ // X < MIN ? T : F --> F
+ if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X < C ? X : C-1 --> X > C-1 ? C-1 : X
+ Constant *AdjustedRHS = SubOne(CI);
+ if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+ Changed = true;
+ }
+ break;
+ }
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: {
+ // X > MAX ? T : F --> F
+ if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X > C ? X : C+1 --> X < C+1 ? C+1 : X
+ Constant *AdjustedRHS = AddOne(CI);
+ if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ if (CmpLHS == TrueVal && CmpRHS == FalseVal) {
+ // Transform (X == Y) ? X : Y -> Y
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // Transform (X != Y) ? X : Y -> X
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+
+ } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) {
+ // Transform (X == Y) ? Y : X -> X
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // Transform (X != Y) ? Y : X -> Y
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+ }
+
+ /// NOTE: if we wanted to, this is where to detect integer ABS
+
+ return Changed ? &SI : 0;
+}
+
Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
// Transform (X != Y) ? X : Y -> X
if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
return ReplaceInstUsesWith(SI, TrueVal);
- // NOTE: if we wanted to, this is where to detect MIN/MAX/ABS/etc.
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
} else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
// Transform (X == Y) ? Y : X -> X
// Transform (X != Y) ? Y : X -> Y
if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
return ReplaceInstUsesWith(SI, TrueVal);
- // NOTE: if we wanted to, this is where to detect MIN/MAX/ABS/etc.
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
}
+ // NOTE: if we wanted to, this is where to detect ABS
}
// See if we are selecting two values based on a comparison of the two values.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal)) {
- if (ICI->getOperand(0) == TrueVal && ICI->getOperand(1) == FalseVal) {
- // Transform (X == Y) ? X : Y -> Y
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
- return ReplaceInstUsesWith(SI, FalseVal);
- // Transform (X != Y) ? X : Y -> X
- if (ICI->getPredicate() == ICmpInst::ICMP_NE)
- return ReplaceInstUsesWith(SI, TrueVal);
- // NOTE: if we wanted to, this is where to detect MIN/MAX/ABS/etc.
-
- } else if (ICI->getOperand(0) == FalseVal && ICI->getOperand(1) == TrueVal){
- // Transform (X == Y) ? Y : X -> X
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
- return ReplaceInstUsesWith(SI, FalseVal);
- // Transform (X != Y) ? Y : X -> Y
- if (ICI->getPredicate() == ICmpInst::ICMP_NE)
- return ReplaceInstUsesWith(SI, TrueVal);
- // NOTE: if we wanted to, this is where to detect MIN/MAX/ABS/etc.
- }
- }
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
+ if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+ return Result;
if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
const Type *OpPtrTy =
return false; // Cannot transform this return value.
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
- ParameterAttributes RAttrs = CallerPAL.getParamAttrs(0);
+ Attributes RAttrs = CallerPAL.getParamAttrs(0);
if (RAttrs & ParamAttr::typeIncompatible(NewRetTy))
return false; // Attribute not compatible with transformed value.
}
for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
break;
- ParameterAttributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+ Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
if (PAttrs & ParamAttr::VarArgsIncompatible)
return false;
}
attrVec.reserve(NumCommonArgs);
// Get any return attributes.
- ParameterAttributes RAttrs = CallerPAL.getParamAttrs(0);
+ Attributes RAttrs = CallerPAL.getParamAttrs(0);
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
}
// Add any parameter attributes.
- if (ParameterAttributes PAttrs = CallerPAL.getParamAttrs(i + 1))
+ if (Attributes PAttrs = CallerPAL.getParamAttrs(i + 1))
attrVec.push_back(ParamAttrsWithIndex::get(i + 1, PAttrs));
}
}
// Add any parameter attributes.
- if (ParameterAttributes PAttrs = CallerPAL.getParamAttrs(i + 1))
+ if (Attributes PAttrs = CallerPAL.getParamAttrs(i + 1))
attrVec.push_back(ParamAttrsWithIndex::get(i + 1, PAttrs));
}
}
if (!NestAttrs.isEmpty()) {
unsigned NestIdx = 1;
const Type *NestTy = 0;
- ParameterAttributes NestAttr = ParamAttr::None;
+ Attributes NestAttr = ParamAttr::None;
// Look for a parameter marked with the 'nest' attribute.
for (FunctionType::param_iterator I = NestFTy->param_begin(),
// mean appending it. Likewise for attributes.
// Add any function result attributes.
- if (ParameterAttributes Attr = Attrs.getParamAttrs(0))
+ if (Attributes Attr = Attrs.getParamAttrs(0))
NewAttrs.push_back(ParamAttrsWithIndex::get(0, Attr));
{
// Add the original argument and attributes.
NewArgs.push_back(*I);
- if (ParameterAttributes Attr = Attrs.getParamAttrs(Idx))
+ if (Attributes Attr = Attrs.getParamAttrs(Idx))
NewAttrs.push_back
(ParamAttrsWithIndex::get(Idx + (Idx >= NestIdx), Attr));
}
}
// If we are using a wider index than needed for this platform, shrink it
- // to what we need. If the incoming value needs a cast instruction,
+ // to what we need. If narrower, sign-extend it to what we need.
+ // If the incoming value needs a cast instruction,
// insert it. This explicit cast can make subsequent optimizations more
// obvious.
Value *Op = *i;
*i = Op;
MadeChange = true;
}
+ } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) {
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ *i = ConstantExpr::getSExt(C, TD->getIntPtrType());
+ MadeChange = true;
+ } else {
+ Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(),
+ GEP);
+ *i = Op;
+ MadeChange = true;
+ }
}
}
}
}
Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
- // See if we are trying to extract a known value. If so, use that instead.
- if (Value *Elt = FindInsertedValue(EV.getOperand(0), EV.idx_begin(),
- EV.idx_end(), &EV))
- return ReplaceInstUsesWith(EV, Elt);
+ Value *Agg = EV.getAggregateOperand();
- // No changes
+ if (!EV.hasIndices())
+ return ReplaceInstUsesWith(EV, Agg);
+
+ if (Constant *C = dyn_cast<Constant>(Agg)) {
+ if (isa<UndefValue>(C))
+ return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
+
+ if (isa<ConstantAggregateZero>(C))
+ return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
+
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+ // Extract the element indexed by the first index out of the constant
+ Value *V = C->getOperand(*EV.idx_begin());
+ if (EV.getNumIndices() > 1)
+ // Extract the remaining indices out of the constant indexed by the
+ // first index
+ return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
+ else
+ return ReplaceInstUsesWith(EV, V);
+ }
+ return 0; // Can't handle other constants
+ }
+ if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+ // We're extracting from an insertvalue instruction, compare the indices
+ const unsigned *exti, *exte, *insi, *inse;
+ for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+ exte = EV.idx_end(), inse = IV->idx_end();
+ exti != exte && insi != inse;
+ ++exti, ++insi) {
+ if (*insi != *exti)
+ // The insert and extract both reference distinctly different elements.
+ // This means the extract is not influenced by the insert, and we can
+ // replace the aggregate operand of the extract with the aggregate
+ // operand of the insert. i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 0
+ // with
+ // %E = extractvalue { i32, { i32 } } %A, 0
+ return ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.idx_begin(), EV.idx_end());
+ }
+ if (exti == exte && insi == inse)
+ // Both iterators are at the end: Index lists are identical. Replace
+ // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %C = extractvalue { i32, { i32 } } %B, 1, 0
+ // with "i32 42"
+ return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
+ if (exti == exte) {
+ // The extract list is a prefix of the insert list. i.e. replace
+ // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %E = extractvalue { i32, { i32 } } %I, 1
+ // with
+ // %X = extractvalue { i32, { i32 } } %A, 1
+ // %E = insertvalue { i32 } %X, i32 42, 0
+ // by switching the order of the insert and extract (though the
+ // insertvalue should be left in, since it may have other uses).
+ Value *NewEV = InsertNewInstBefore(
+ ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.idx_begin(), EV.idx_end()),
+ EV);
+ return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+ insi, inse);
+ }
+ if (insi == inse)
+ // The insert list is a prefix of the extract list
+ // We can simply remove the common indices from the extract and make it
+ // operate on the inserted value instead of the insertvalue result.
+ // i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 1, 0
+ // with
+ // %E extractvalue { i32 } { i32 42 }, 0
+ return ExtractValueInst::Create(IV->getInsertedValueOperand(),
+ exti, exte);
+ }
+ // Can't simplify extracts from other values. Note that nested extracts are
+ // already simplified implicitely by the above (extract ( extract (insert) )
+ // will be translated into extract ( insert ( extract ) ) first and then just
+ // the value inserted, if appropriate).
return 0;
}
if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted value.
if (EI->getOperand(0) == LHS) {
- Mask[InsertedIdx & (NumElts-1)] =
+ Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::Int32Ty, ExtractedIdx);
} else {
assert(EI->getOperand(0) == RHS);
- Mask[InsertedIdx & (NumElts-1)] =
+ Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::Int32Ty, ExtractedIdx+NumElts);
}
if (EI->getOperand(0) == RHS || RHS == 0) {
RHS = EI->getOperand(0);
Value *V = CollectShuffleElements(VecOp, Mask, RHS);
- Mask[InsertedIdx & (NumElts-1)] =
+ Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::Int32Ty, NumElts+ExtractedIdx);
return V;
}
// Undefined shuffle mask -> undefined value.
if (isa<UndefValue>(SVI.getOperand(2)))
return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
-
- // If we have shuffle(x, undef, mask) and any elements of mask refer to
- // the undef, change them to undefs.
- if (isa<UndefValue>(SVI.getOperand(1))) {
- // Scan to see if there are any references to the RHS. If so, replace them
- // with undef element refs and set MadeChange to true.
- for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] >= e && Mask[i] != 2*e) {
- Mask[i] = 2*e;
- MadeChange = true;
- }
- }
-
- if (MadeChange) {
- // Remap any references to RHS to use LHS.
- std::vector<Constant*> Elts;
- for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] == 2*e)
- Elts.push_back(UndefValue::get(Type::Int32Ty));
- else
- Elts.push_back(ConstantInt::get(Type::Int32Ty, Mask[i]));
- }
- SVI.setOperand(2, ConstantVector::get(Elts));
- }
+
+ uint64_t UndefElts;
+ unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+ uint64_t AllOnesEltMask = ~0ULL >> (64-VWidth);
+ if (VWidth <= 64 &&
+ SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
}
// Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
Elts.push_back(UndefValue::get(Type::Int32Ty));
else {
if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
- (Mask[i] < e && isa<UndefValue>(LHS)))
+ (Mask[i] < e && isa<UndefValue>(LHS))) {
Mask[i] = 2*e; // Turn into undef.
- else
- Mask[i] &= (e-1); // Force to LHS.
- Elts.push_back(ConstantInt::get(Type::Int32Ty, Mask[i]));
+ Elts.push_back(UndefValue::get(Type::Int32Ty));
+ } else {
+ Mask[i] = Mask[i] % e; // Force to LHS.
+ Elts.push_back(ConstantInt::get(Type::Int32Ty, Mask[i]));
+ }
}
}
SVI.setOperand(0, SVI.getOperand(1));
SmallPtrSet<BasicBlock*, 64> &Visited,
InstCombiner &IC,
const TargetData *TD) {
- std::vector<BasicBlock*> Worklist;
+ SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
while (!Worklist.empty()) {
}
// See if we can trivially sink this instruction to a successor basic block.
- // FIXME: Remove GetResultInst test when first class support for aggregates
- // is implemented.
- if (I->hasOneUse() && !isa<GetResultInst>(I)) {
+ if (I->hasOneUse()) {
BasicBlock *BB = I->getParent();
BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent();
if (UserParent != BB) {
return new InstCombiner();
}
+