X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FInstCombine%2FInstructionCombining.cpp;h=e99000125cc4d1059047596f3e90ab910eec47fb;hb=c6334b97e1de7c7c67c7279bdc44eb99ea65c78c;hp=13f0ac6f7c97ba3d0da448fbc5c6c2673812fc66;hpb=ac8f2fd2e217d063d7ef57b8698894739f707431;p=oota-llvm.git diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 13f0ac6f7c9..e99000125cc 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -38,27 +38,21 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" -#include "llvm/Pass.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" #include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/Support/TargetFolder.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -70,7 +64,6 @@ using namespace llvm::PatternMatch; STATISTIC(NumCombined , "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); -STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumSunkInst , "Number of instructions sunk"); @@ -78,20 +71,12 @@ char InstCombiner::ID = 0; static RegisterPass X("instcombine", "Combine redundant instructions"); -// getComplexity: Assign a complexity or rank value to LLVM Values... -// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst -static unsigned getComplexity(Value *V) { - if (isa(V)) { - if (BinaryOperator::isNeg(V) || - BinaryOperator::isFNeg(V) || - BinaryOperator::isNot(V)) - return 3; - return 4; - } - if (isa(V)) return 3; - return isa(V) ? (isa(V) ? 0 : 1) : 2; +void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(LCSSAID); + AU.setPreservesCFG(); } + // isOnlyUse - Return true if this instruction will be deleted if we stop using // it. static bool isOnlyUse(Value *V) { @@ -111,8 +96,7 @@ static const Type *getPromotedType(const Type *Ty) { /// ShouldChangeType - Return true if it is desirable to convert a computation /// from 'From' to 'To'. We don't want to convert from a legal to an illegal /// type for example, or from a smaller to a larger illegal type. -static bool ShouldChangeType(const Type *From, const Type *To, - const TargetData *TD) { +bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { assert(isa(From) && isa(To)); // If we don't have TD, we don't know if the source/dest are legal. @@ -150,51 +134,7 @@ static Value *getBitCastOperand(Value *V) { return 0; } -/// This function is a wrapper around CastInst::isEliminableCastPair. It -/// simply extracts arguments and returns what that function returns. -static Instruction::CastOps -isEliminableCastPair( - const CastInst *CI, ///< The first cast instruction - unsigned opcode, ///< The opcode of the second cast instruction - const Type *DstTy, ///< The target type for the second cast instruction - TargetData *TD ///< The target data for pointer size -) { - - const Type *SrcTy = CI->getOperand(0)->getType(); // A from above - const Type *MidTy = CI->getType(); // B from above - - // Get the opcodes of the two Cast instructions - Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); - Instruction::CastOps secondOp = Instruction::CastOps(opcode); - - unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, - DstTy, - TD ? TD->getIntPtrType(CI->getContext()) : 0); - - // We don't want to form an inttoptr or ptrtoint that converts to an integer - // type that differs from the pointer size. - if ((Res == Instruction::IntToPtr && - (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || - (Res == Instruction::PtrToInt && - (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) - Res = 0; - - return Instruction::CastOps(Res); -} -/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results -/// in any code being generated. It does not require codegen if V is simple -/// enough or if the cast can be folded into other casts. -static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V, - const Type *Ty, TargetData *TD) { - if (V->getType() == Ty || isa(V)) return false; - - // If this is another cast that can be eliminated, it isn't codegen either. - if (const CastInst *CI = dyn_cast(V)) - if (isEliminableCastPair(CI, opcode, Ty, TD)) - return false; - return true; -} // SimplifyCommutative - This performs a few simplifications for commutative // operators: @@ -245,7 +185,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction // if the LHS is a constant zero (which is the 'negate' form). // -static inline Value *dyn_castNegVal(Value *V) { +Value *InstCombiner::dyn_castNegVal(Value *V) const { if (BinaryOperator::isNeg(V)) return BinaryOperator::getNegArgument(V); @@ -279,57 +219,6 @@ static inline Value *dyn_castFNegVal(Value *V) { return 0; } -/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, -/// returning the kind and providing the out parameter results if we -/// successfully match. -static SelectPatternFlavor -MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { - SelectInst *SI = dyn_cast(V); - if (SI == 0) return SPF_UNKNOWN; - - ICmpInst *ICI = dyn_cast(SI->getCondition()); - if (ICI == 0) return SPF_UNKNOWN; - - LHS = ICI->getOperand(0); - RHS = ICI->getOperand(1); - - // (icmp X, Y) ? X : Y - if (SI->getTrueValue() == ICI->getOperand(0) && - SI->getFalseValue() == ICI->getOperand(1)) { - switch (ICI->getPredicate()) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMAX; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMAX; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMIN; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMIN; - } - } - - // (icmp X, Y) ? Y : X - if (SI->getTrueValue() == ICI->getOperand(1) && - SI->getFalseValue() == ICI->getOperand(0)) { - switch (ICI->getPredicate()) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMIN; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMIN; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMAX; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMAX; - } - } - - // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) - - return SPF_UNKNOWN; -} - /// isFreeToInvert - Return true if the specified value is free to invert (apply /// ~ to). This happens in cases where the ~ can be eliminated. static inline bool isFreeToInvert(Value *V) { @@ -391,13 +280,11 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { /// AddOne - Add one to a ConstantInt static Constant *AddOne(Constant *C) { - return ConstantExpr::getAdd(C, - ConstantInt::get(C->getType(), 1)); + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } /// SubOne - Subtract one from a ConstantInt static Constant *SubOne(ConstantInt *C) { - return ConstantExpr::getSub(C, - ConstantInt::get(C->getType(), 1)); + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } /// MultiplyOverflows - True if the multiply can not be expressed in an int /// this size. @@ -423,9404 +310,4060 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { } -/// ShrinkDemandedConstant - Check to see if the specified operand of the -/// specified instruction is a constant integer. If so, check to see if there -/// are any bits set in the constant that are not demanded. If so, shrink the -/// constant and return true. -static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - APInt Demanded) { - assert(I && "No instruction?"); - assert(OpNo < I->getNumOperands() && "Operand index too large"); - // If the operand is not a constant integer, nothing to do. - ConstantInt *OpC = dyn_cast(I->getOperand(OpNo)); - if (!OpC) return false; +/// AssociativeOpt - Perform an optimization on an associative operator. This +/// function is designed to check a chain of associative operators for a +/// potential to apply a certain optimization. Since the optimization may be +/// applicable if the expression was reassociated, this checks the chain, then +/// reassociates the expression as necessary to expose the optimization +/// opportunity. This makes use of a special Functor, which must define +/// 'shouldApply' and 'apply' methods. +/// +template +static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { + unsigned Opcode = Root.getOpcode(); + Value *LHS = Root.getOperand(0); - // If there are no bits set that aren't demanded, nothing to do. - Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); - if ((~Demanded & OpC->getValue()) == 0) - return false; + // Quick check, see if the immediate LHS matches... + if (F.shouldApply(LHS)) + return F.apply(Root); - // This instruction is producing bits that are not demanded. Shrink the RHS. - Demanded &= OpC->getValue(); - I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); - return true; -} + // Otherwise, if the LHS is not of the same opcode as the root, return. + Instruction *LHSI = dyn_cast(LHS); + while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) { + // Should we apply this transform to the RHS? + bool ShouldApply = F.shouldApply(LHSI->getOperand(1)); + + // If not to the RHS, check to see if we should apply to the LHS... + if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) { + cast(LHSI)->swapOperands(); // Make the LHS the RHS + ShouldApply = true; + } + + // If the functor wants to apply the optimization to the RHS of LHSI, + // reassociate the expression from ((? op A) op B) to (? op (A op B)) + if (ShouldApply) { + // Now all of the instructions are in the current basic block, go ahead + // and perform the reassociation. + Instruction *TmpLHSI = cast(Root.getOperand(0)); + + // First move the selected RHS to the LHS of the root... + Root.setOperand(0, LHSI->getOperand(1)); + + // Make what used to be the LHS of the root be the user of the root... + Value *ExtraOperand = TmpLHSI->getOperand(1); + if (&Root == TmpLHSI) { + Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType())); + return 0; + } + Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI + TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root + BasicBlock::iterator ARI = &Root; ++ARI; + TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root + ARI = Root; + + // Now propagate the ExtraOperand down the chain of instructions until we + // get to LHSI. + while (TmpLHSI != LHSI) { + Instruction *NextLHSI = cast(TmpLHSI->getOperand(0)); + // Move the instruction to immediately before the chain we are + // constructing to avoid breaking dominance properties. + NextLHSI->moveBefore(ARI); + ARI = NextLHSI; + + Value *NextOp = NextLHSI->getOperand(1); + NextLHSI->setOperand(1, ExtraOperand); + TmpLHSI = NextLHSI; + ExtraOperand = NextOp; + } + + // Now that the instructions are reassociated, have the functor perform + // the transformation... + return F.apply(Root); + } -// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a -// set of known zero and one bits, compute the maximum and minimum values that -// could have the specified known zero and known one bits, returning them in -// min/max. -static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, - const APInt& KnownOne, - APInt& Min, APInt& Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && - "KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); - - // The minimum value is when all unknown bits are zeros, EXCEPT for the sign - // bit if it is unknown. - Min = KnownOne; - Max = KnownOne|UnknownBits; - - if (UnknownBits.isNegative()) { // Sign bit is unknown - Min.set(Min.getBitWidth()-1); - Max.clear(Max.getBitWidth()-1); + LHSI = dyn_cast(LHSI->getOperand(0)); } + return 0; } -// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and -// a set of known zero and one bits, compute the maximum and minimum values that -// could have the specified known zero and known one bits, returning them in -// min/max. -static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, - const APInt &KnownOne, - APInt &Min, APInt &Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && - "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); - - // The minimum value is when the unknown bits are all zeros. - Min = KnownOne; - // The maximum value is when the unknown bits are all ones. - Max = KnownOne|UnknownBits; +namespace { + +// AddRHS - Implements: X + X --> X << 1 +struct AddRHS { + Value *RHS; + explicit AddRHS(Value *rhs) : RHS(rhs) {} + bool shouldApply(Value *LHS) const { return LHS == RHS; } + Instruction *apply(BinaryOperator &Add) const { + return BinaryOperator::CreateShl(Add.getOperand(0), + ConstantInt::get(Add.getType(), 1)); + } +}; + +// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2) +// iff C1&C2 == 0 +struct AddMaskingAnd { + Constant *C2; + explicit AddMaskingAnd(Constant *c) : C2(c) {} + bool shouldApply(Value *LHS) const { + ConstantInt *C1; + return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && + ConstantExpr::getAnd(C1, C2)->isNullValue(); + } + Instruction *apply(BinaryOperator &Add) const { + return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); + } +}; + } -/// SimplifyDemandedInstructionBits - Inst is an integer instruction that -/// SimplifyDemandedBits knows about. See if the instruction has any -/// properties that allow us to simplify its operands. -bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { - unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); +static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, + InstCombiner *IC) { + if (CastInst *CI = dyn_cast(&I)) + return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); + + // Figure out if the constant is the left or the right argument. + bool ConstIsRHS = isa(I.getOperand(1)); + Constant *ConstOperand = cast(I.getOperand(ConstIsRHS)); + + if (Constant *SOC = dyn_cast(SO)) { + if (ConstIsRHS) + return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); + return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); + } + + Value *Op0 = SO, *Op1 = ConstOperand; + if (!ConstIsRHS) + std::swap(Op0, Op1); - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, - KnownZero, KnownOne, 0); - if (V == 0) return false; - if (V == &Inst) return true; - ReplaceInstUsesWith(Inst, V); - return true; + if (BinaryOperator *BO = dyn_cast(&I)) + return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName()+".op"); + if (ICmpInst *CI = dyn_cast(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + if (FCmpInst *CI = dyn_cast(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + llvm_unreachable("Unknown binary instruction type!"); } -/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the -/// specified instruction operand if possible, updating it in place. It returns -/// true if it made any change and false otherwise. -bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) { - Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, - KnownZero, KnownOne, Depth); - if (NewVal == 0) return false; - U = NewVal; - return true; +// FoldOpIntoSelect - Given an instruction with a select as one operand and a +// constant as the other operand, try to fold the binary operator into the +// select arguments. This also works for Cast instructions, which obviously do +// not have a second operand. +Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { + // Don't modify shared select instructions + if (!SI->hasOneUse()) return 0; + Value *TV = SI->getOperand(1); + Value *FV = SI->getOperand(2); + + if (isa(TV) || isa(FV)) { + // Bool selects with constant operands can be folded to logical ops. + if (SI->getType() == Type::getInt1Ty(SI->getContext())) return 0; + + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); + Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); + + return SelectInst::Create(SI->getCondition(), SelectTrueVal, + SelectFalseVal); + } + return 0; } -/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler -/// value based on the demanded bits. When this function is called, it is known -/// that only the bits set in DemandedMask of the result of V are ever used -/// downstream. Consequently, depending on the mask and V, it may be possible -/// to replace V with a constant or one of its operands. In such cases, this -/// function does the replacement and returns true. In all other cases, it -/// returns false after analyzing the expression and setting KnownOne and known -/// to be one in the expression. KnownZero contains all the bits that are known -/// to be zero in the expression. These are provided to potentially allow the -/// caller (which might recursively be SimplifyDemandedBits itself) to simplify -/// the expression. KnownOne and KnownZero always follow the invariant that -/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that -/// the bits in KnownOne and KnownZero may only be accurate for those bits set -/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero -/// and KnownOne must all be the same. +/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which +/// has a PHI node as operand #0, see if we can fold the instruction into the +/// PHI (which is only possible if all operands to the PHI are constants). /// -/// This returns null if it did not change anything and it permits no -/// simplification. This returns V itself if it did some simplification of V's -/// operands based on the information about what bits are demanded. This returns -/// some other non-null value if it found out that V is equal to another value -/// in the context where the specified bits are demanded, but not for all users. -Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) { - assert(V != 0 && "Null pointer of Value???"); - assert(Depth <= 6 && "Limit Search Depth"); - uint32_t BitWidth = DemandedMask.getBitWidth(); - const Type *VTy = V->getType(); - assert((TD || !isa(VTy)) && - "SimplifyDemandedBits needs to know bit widths!"); - assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && - (!VTy->isIntOrIntVector() || - VTy->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne " - "must have same BitWidth"); - if (ConstantInt *CI = dyn_cast(V)) { - // We know all of the bits for a constant! - KnownOne = CI->getValue() & DemandedMask; - KnownZero = ~KnownOne & DemandedMask; - return 0; - } - if (isa(V)) { - // We know all of the bits for a constant! - KnownOne.clear(); - KnownZero = DemandedMask; +/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms +/// that would normally be unprofitable because they strongly encourage jump +/// threading. +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, + bool AllowAggressive) { + AllowAggressive = false; + PHINode *PN = cast(I.getOperand(0)); + unsigned NumPHIValues = PN->getNumIncomingValues(); + if (NumPHIValues == 0 || + // We normally only transform phis with a single use, unless we're trying + // hard to make jump threading happen. + (!PN->hasOneUse() && !AllowAggressive)) return 0; - } - - KnownZero.clear(); - KnownOne.clear(); - if (DemandedMask == 0) { // Not demanding any bits from V. - if (isa(V)) - return 0; - return UndefValue::get(VTy); - } - if (Depth == 6) // Limit search depth. - return 0; - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; - - Instruction *I = dyn_cast(V); - if (!I) { - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - return 0; // Only analyze instructions. - } - - // If there are multiple uses of this value and we aren't at the root, then - // we can't do any simplifications of the operands, because DemandedMask - // only reflects the bits demanded by *one* of the users. - if (Depth != 0 && !I->hasOneUse()) { - // Despite the fact that we can't simplify this instruction in all User's - // context, we can at least compute the knownzero/knownone bits, and we can - // do simplifications that apply to *just* the one user if we know that - // this instruction has a simpler value in that context. - if (I->getOpcode() == Instruction::And) { - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1); - - // If all of the demanded bits are known 1 on one side, return the other. - // These bits cannot contribute to the result of the 'and' in this - // context. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) - return I->getOperand(1); - - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - - } else if (I->getOpcode() == Instruction::Or) { - // We can simplify (X|Y) -> X or Y in the user's context if we know that - // only bits from X or Y are demanded. - - // If either the LHS or the RHS are One, the result is One. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1); - - // If all of the demanded bits are known zero on one side, return the - // other. These bits cannot contribute to the result of the 'or' in this - // context. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); + // Check to see if all of the operands of the PHI are simple constants + // (constantint/constantfp/undef). If there is one non-constant value, + // remember the BB it is in. If there is more than one or if *it* is a PHI, + // bail out. We don't do arbitrary constant expressions here because moving + // their computation can be expensive without a cost model. + BasicBlock *NonConstBB = 0; + for (unsigned i = 0; i != NumPHIValues; ++i) + if (!isa(PN->getIncomingValue(i)) || + isa(PN->getIncomingValue(i))) { + if (NonConstBB) return 0; // More than one non-const value. + if (isa(PN->getIncomingValue(i))) return 0; // Itself a phi. + NonConstBB = PN->getIncomingBlock(i); - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) - return I->getOperand(1); + // If the incoming non-constant value is in I's block, we have an infinite + // loop. + if (NonConstBB == I.getParent()) + return 0; } - - // Compute the KnownZero/KnownOne bits to simplify things downstream. - ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); - return 0; - } - - // If this is the root being simplified, allow it to have multiple uses, - // just set the DemandedMask to all bits so that we can try to simplify the - // operands. This allows visitTruncInst (for example) to simplify the - // operand of a trunc without duplicating all the logic below. - if (Depth == 0 && !V->hasOneUse()) - DemandedMask = APInt::getAllOnesValue(BitWidth); - switch (I->getOpcode()) { - default: - ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - break; - case Instruction::And: - // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If all of the demanded bits are known 1 on one side, return the other. - // These bits cannot contribute to the result of the 'and'. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) - return I->getOperand(1); - - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) - return I; - - // Output known-1 bits are only known if set in both the LHS & RHS. - RHSKnownOne &= LHSKnownOne; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - RHSKnownZero |= LHSKnownZero; - break; - case Instruction::Or: - // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If all of the demanded bits are known zero on one side, return the other. - // These bits cannot contribute to the result of the 'or'. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) - return I->getOperand(1); - - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask)) - return I; - - // Output known-0 bits are only known if clear in both the LHS & RHS. - RHSKnownZero &= LHSKnownZero; - // Output known-1 are known to be set if set in either the LHS | RHS. - RHSKnownOne |= LHSKnownOne; - break; - case Instruction::Xor: { - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If all of the demanded bits are known zero on one side, return the other. - // These bits cannot contribute to the result of the 'xor'. - if ((DemandedMask & RHSKnownZero) == DemandedMask) - return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) - return I->getOperand(1); - - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | - (RHSKnownOne & LHSKnownOne); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | - (RHSKnownOne & LHSKnownZero); - - // If all of the demanded bits are known to be zero on one side or the - // other, turn this into an *inclusive* or. - // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { - Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), - I->getName()); - return InsertNewInstBefore(Or, *I); - } - - // If all of the demanded bits on one side are known, and all of the set - // bits on that side are also known to be set on the other side, turn this - // into an AND, as we know the bits will be cleared. - // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { - // all known - if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { - Constant *AndC = Constant::getIntegerValue(VTy, - ~RHSKnownOne & DemandedMask); - Instruction *And = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); - return InsertNewInstBefore(And, *I); - } - } - - // If the RHS is a constant, see if we can simplify it. - // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. - if (ShrinkDemandedConstant(I, 1, DemandedMask)) - return I; - - // If our LHS is an 'and' and if it has one use, and if any of the bits we - // are flipping are known to be set, then the xor is just resetting those - // bits to zero. We can just knock out bits from the 'and' and the 'xor', - // simplifying both of them. - if (Instruction *LHSInst = dyn_cast(I->getOperand(0))) - if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && - isa(I->getOperand(1)) && - isa(LHSInst->getOperand(1)) && - (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { - ConstantInt *AndRHS = cast(LHSInst->getOperand(1)); - ConstantInt *XorRHS = cast(I->getOperand(1)); - APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); - - Constant *AndC = - ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); - Instruction *NewAnd = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); - InsertNewInstBefore(NewAnd, *I); - - Constant *XorC = - ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); - Instruction *NewXor = - BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); - return InsertNewInstBefore(NewXor, *I); - } - - - RHSKnownZero = KnownZeroOut; - RHSKnownOne = KnownOneOut; - break; + // If there is exactly one non-constant value, we can insert a copy of the + // operation in that block. However, if this is a critical edge, we would be + // inserting the computation one some other paths (e.g. inside a loop). Only + // do this if the pred block is unconditionally branching into the phi block. + if (NonConstBB != 0 && !AllowAggressive) { + BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); + if (!BI || !BI->isUnconditional()) return 0; } - case Instruction::Select: - if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(I, 1, DemandedMask) || - ShrinkDemandedConstant(I, 2, DemandedMask)) - return I; - - // Only known if known in both the LHS and RHS. - RHSKnownOne &= LHSKnownOne; - RHSKnownZero &= LHSKnownZero; - break; - case Instruction::Trunc: { - unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask.zext(truncBf); - RHSKnownZero.zext(truncBf); - RHSKnownOne.zext(truncBf); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - DemandedMask.trunc(BitWidth); - RHSKnownZero.trunc(BitWidth); - RHSKnownOne.trunc(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - break; - } - case Instruction::BitCast: - if (!I->getOperand(0)->getType()->isIntOrIntVector()) - return false; // vector->int or fp->int? - - if (const VectorType *DstVTy = dyn_cast(I->getType())) { - if (const VectorType *SrcVTy = - dyn_cast(I->getOperand(0)->getType())) { - if (DstVTy->getNumElements() != SrcVTy->getNumElements()) - // Don't touch a bitcast between vectors of different element counts. - return false; - } else - // Don't touch a scalar-to-vector bitcast. - return false; - } else if (isa(I->getOperand(0)->getType())) - // Don't touch a vector-to-scalar bitcast. - return false; - - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - break; - case Instruction::ZExt: { - // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - DemandedMask.trunc(SrcBitWidth); - RHSKnownZero.trunc(SrcBitWidth); - RHSKnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - DemandedMask.zext(BitWidth); - RHSKnownZero.zext(BitWidth); - RHSKnownOne.zext(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - // The top bits are known to be zero. - RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - break; - } - case Instruction::SExt: { - // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - APInt InputDemandedBits = DemandedMask & - APInt::getLowBitsSet(BitWidth, SrcBitWidth); - - APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); - // If any of the sign extended bits are demanded, we know that the sign - // bit is demanded. - if ((NewBits & DemandedMask) != 0) - InputDemandedBits.set(SrcBitWidth-1); - - InputDemandedBits.trunc(SrcBitWidth); - RHSKnownZero.trunc(SrcBitWidth); - RHSKnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - InputDemandedBits.zext(BitWidth); - RHSKnownZero.zext(BitWidth); - RHSKnownOne.zext(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. - - // If the input sign bit is known zero, or if the NewBits are not demanded - // convert this into a zero extension. - if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { - // Convert to ZExt cast - CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); - return InsertNewInstBefore(NewCast, *I); - } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set - RHSKnownOne |= NewBits; - } - break; - } - case Instruction::Add: { - // Figure out what the input bits are. If the top bits of the and result - // are not demanded, then the add doesn't demand them from its input - // either. - unsigned NLZ = DemandedMask.countLeadingZeros(); - - // If there is a constant on the RHS, there are a variety of xformations - // we can do. - if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { - // If null, this should be simplified elsewhere. Some of the xforms here - // won't work if the RHS is zero. - if (RHS->isZero()) - break; - - // If the top bit of the output is demanded, demand everything from the - // input. Otherwise, we demand all the input bits except NLZ top bits. - APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); - // Find information about known zero/one bits in the input. - if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; + // Okay, we can do the transformation: create the new PHI node. + PHINode *NewPN = PHINode::Create(I.getType(), ""); + NewPN->reserveOperandSpace(PN->getNumOperands()/2); + InsertNewInstBefore(NewPN, *PN); + NewPN->takeName(PN); - // If the RHS of the add has bits set that can't affect the input, reduce - // the constant. - if (ShrinkDemandedConstant(I, 1, InDemandedBits)) - return I; - - // Avoid excess work. - if (LHSKnownZero == 0 && LHSKnownOne == 0) - break; - - // Turn it into OR if input bits are zero. - if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { - Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), - I->getName()); - return InsertNewInstBefore(Or, *I); - } - - // We can say something about the output known-zero and known-one bits, - // depending on potential carries from the input constant and the - // unknowns. For example if the LHS is known to have at most the 0x0F0F0 - // bits set and the RHS constant is 0x01001, then we know we have a known - // one mask of 0x00001 and a known zero mask of 0xE0F0E. - - // To compute this, we first compute the potential carry bits. These are - // the bits which may be modified. I'm not aware of a better way to do - // this scan. - const APInt &RHSVal = RHS->getValue(); - APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); - - // Now that we know which bits have carries, compute the known-1/0 sets. - - // Bits are known one if they are known zero in one operand and one in the - // other, and there is no input carry. - RHSKnownOne = ((LHSKnownZero & RHSVal) | - (LHSKnownOne & ~RHSVal)) & ~CarryBits; - - // Bits are known zero if they are known zero in both operands and there - // is no input carry. - RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; - } else { - // If the high-bits of this ADD are not demanded, then it does not demand - // the high bits of its LHS or RHS. - if (DemandedMask[BitWidth-1] == 0) { - // Right fill the mask of bits for this ADD to demand the most - // significant bit and all those below it. - APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; + // Next, add all of the operands to the PHI. + if (SelectInst *SI = dyn_cast(&I)) { + // We only currently try to fold the condition of a select when it is a phi, + // not the true/false values. + Value *TrueV = SI->getTrueValue(); + Value *FalseV = SI->getFalseValue(); + BasicBlock *PhiTransBB = PN->getParent(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + BasicBlock *ThisBB = PN->getIncomingBlock(i); + Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); + Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); + Value *InV = 0; + if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { + InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, + FalseVInPred, + "phitmp", NonConstBB->getTerminator()); + Worklist.Add(cast(InV)); } + NewPN->addIncoming(InV, ThisBB); } - break; - } - case Instruction::Sub: - // If the high-bits of this SUB are not demanded, then it does not demand - // the high bits of its LHS or RHS. - if (DemandedMask[BitWidth-1] == 0) { - // Right fill the mask of bits for this SUB to demand the most - // significant bit and all those below it. - uint32_t NLZ = DemandedMask.countLeadingZeros(); - APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - } - // Otherwise just hand the sub off to ComputeMaskedBits to fill in - // the known zeros and ones. - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - break; - case Instruction::Shl: - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - RHSKnownZero <<= ShiftAmt; - RHSKnownOne <<= ShiftAmt; - // low bits known zero. - if (ShiftAmt) - RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - } - break; - case Instruction::LShr: - // For a logical shift right - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - - // Unsigned shift right. - APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); - RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); - if (ShiftAmt) { - // Compute the new bits that are at the top now. - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - RHSKnownZero |= HighBits; // high bits known zero. + } else if (I.getNumOperands() == 2) { + Constant *C = cast(I.getOperand(1)); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV = 0; + if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { + if (CmpInst *CI = dyn_cast(&I)) + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); + else + InV = ConstantExpr::get(I.getOpcode(), InC, C); + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + if (BinaryOperator *BO = dyn_cast(&I)) + InV = BinaryOperator::Create(BO->getOpcode(), + PN->getIncomingValue(i), C, "phitmp", + NonConstBB->getTerminator()); + else if (CmpInst *CI = dyn_cast(&I)) + InV = CmpInst::Create(CI->getOpcode(), + CI->getPredicate(), + PN->getIncomingValue(i), C, "phitmp", + NonConstBB->getTerminator()); + else + llvm_unreachable("Unknown binop!"); + + Worklist.Add(cast(InV)); } + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } - break; - case Instruction::AShr: - // If this is an arithmetic shift right and only the low-bit is set, we can - // always convert this into a logical shr, even if the shift amount is - // variable. The low bit of the shift cannot be an input sign bit unless - // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) { - // Perform the logical shift right. - Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), I->getOperand(1), I->getName()); - return InsertNewInstBefore(NewVal, *I); - } - - // If the sign bit is the only bit demanded by this ashr, then there is no - // need to do it, the shift doesn't change the high bit. - if (DemandedMask.isSignBit()) - return I->getOperand(0); - - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); - - // Signed shift right. - APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - // If any of the "high bits" are demanded, we should set the sign bit as - // demanded. - if (DemandedMask.countLeadingZeros() <= ShiftAmt) - DemandedMaskIn.set(BitWidth-1); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - // Compute the new bits that are at the top now. - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); - RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); - - // Handle the sign bits. - APInt SignBit(APInt::getSignBit(BitWidth)); - // Adjust to where it is now in the mask. - SignBit = APIntOps::lshr(SignBit, ShiftAmt); - - // If the input sign bit is known to be zero, or if none of the top bits - // are demanded, turn this into an unsigned shift right. - if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || - (HighBits & ~DemandedMask) == HighBits) { - // Perform the logical shift right. - Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), SA, I->getName()); - return InsertNewInstBefore(NewVal, *I); - } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one. - RHSKnownOne |= HighBits; + } else { + CastInst *CI = cast(&I); + const Type *RetTy = CI->getType(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV; + if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { + InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), + I.getType(), "phitmp", + NonConstBB->getTerminator()); + Worklist.Add(cast(InV)); } + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } - break; - case Instruction::SRem: - if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { - APInt RA = Rem->getValue().abs(); - if (RA.isPowerOf2()) { - if (DemandedMask.ult(RA)) // srem won't affect demanded bits - return I->getOperand(0); + } + return ReplaceInstUsesWith(I, NewPN); +} + + +/// WillNotOverflowSignedAdd - Return true if we can prove that: +/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) +/// This basically requires proving that the add in the original type would not +/// overflow to change the sign bit or have a carry out. +bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { + // There are different heuristics we can use for this. Here are some simple + // ones. + + // Add has the property that adding any two 2's complement numbers can only + // have one carry bit which can change a sign. As such, if LHS and RHS each + // have at least two sign bits, we know that the addition of the two values + // will sign extend fine. + if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) + return true; + + + // If one of the operands only has one non-zero bit, and if the other operand + // has a known-zero bit in a more significant place than it (not including the + // sign bit) the ripple may go up to and fill the zero, but won't change the + // sign. For example, (X & ~4) + 1. + + // TODO: Implement. + + return false; +} - APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) - LHSKnownZero |= ~LowBits; +Instruction *InstCombiner::visitAdd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - KnownZero |= LHSKnownZero & DemandedMask; + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), TD)) + return ReplaceInstUsesWith(I, V); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - } + + if (Constant *RHSC = dyn_cast(RHS)) { + if (ConstantInt *CI = dyn_cast(RHSC)) { + // X + (signbit) --> X ^ signbit + const APInt& Val = CI->getValue(); + uint32_t BitWidth = Val.getBitWidth(); + if (Val == APInt::getSignBit(BitWidth)) + return BinaryOperator::CreateXor(LHS, RHS); + + // See if SimplifyDemandedBits can simplify this. This handles stuff like + // (X & 254)+1 -> (X&254)|1 + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // zext(bool) + C -> bool ? C + 1 : C + if (ZExtInst *ZI = dyn_cast(LHS)) + if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) + return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); } - break; - case Instruction::URem: { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, - KnownZero2, KnownOne2, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), AllOnes, - KnownZero2, KnownOne2, Depth+1)) - return I; - unsigned Leaders = KnownZero2.countLeadingOnes(); - Leaders = std::max(Leaders, - KnownZero2.countLeadingOnes()); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; - break; - } - case Instruction::Call: - if (IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::bswap: { - // If the only bits demanded come from one byte of the bswap result, - // just shift the input byte into position to eliminate the bswap. - unsigned NLZ = DemandedMask.countLeadingZeros(); - unsigned NTZ = DemandedMask.countTrailingZeros(); - - // Round NTZ down to the next byte. If we have 11 trailing zeros, then - // we need all the bits down to bit 8. Likewise, round NLZ. If we - // have 14 leading zeros, round to 8. - NLZ &= ~7; - NTZ &= ~7; - // If we need exactly one byte, we can do this transformation. - if (BitWidth-NLZ-NTZ == 8) { - unsigned ResultBit = NTZ; - unsigned InputBit = BitWidth-NTZ-8; - - // Replace this with either a left or right shift to get the byte into - // the right place. - Instruction *NewVal; - if (InputBit > ResultBit) - NewVal = BinaryOperator::CreateLShr(I->getOperand(1), - ConstantInt::get(I->getType(), InputBit-ResultBit)); - else - NewVal = BinaryOperator::CreateShl(I->getOperand(1), - ConstantInt::get(I->getType(), ResultBit-InputBit)); - NewVal->takeName(I); - return InsertNewInstBefore(NewVal, *I); + if (isa(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + ConstantInt *XorRHS = 0; + Value *XorLHS = 0; + if (isa(RHSC) && + match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { + uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); + const APInt& RHSVal = cast(RHSC)->getValue(); + + uint32_t Size = TySizeBits / 2; + APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); + APInt CFF80Val(-C0080Val); + do { + if (TySizeBits > Size) { + // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. + // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. + if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || + (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { + // This is a sign extend if the top bits are known zero. + if (!MaskedValueIsZero(XorLHS, + APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) + Size = 0; // Not a sign ext, but can't be any others either. + break; + } } - - // TODO: Could compute known zero/one bits based on the input. - break; + Size >>= 1; + C0080Val = APIntOps::lshr(C0080Val, Size); + CFF80Val = APIntOps::ashr(CFF80Val, Size); + } while (Size >= 1); + + // FIXME: This shouldn't be necessary. When the backends can handle types + // with funny bit widths then this switch statement should be removed. It + // is just here to get the size of the "middle" type back up to something + // that the back ends can handle. + const Type *MiddleType = 0; + switch (Size) { + default: break; + case 32: + case 16: + case 8: MiddleType = IntegerType::get(I.getContext(), Size); break; } + if (MiddleType) { + Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); + return new SExtInst(NewTrunc, I.getType(), I.getName()); } } - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - break; } - - // If the client is only demanding bits that we know, return the known - // constant. - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) - return Constant::getIntegerValue(VTy, RHSKnownOne); - return false; -} + if (I.getType() == Type::getInt1Ty(I.getContext())) + return BinaryOperator::CreateXor(LHS, RHS); -/// SimplifyDemandedVectorElts - The specified value produces a vector with -/// any number of elements. DemandedElts contains the set of elements that are -/// actually used by the caller. This method analyzes which elements of the -/// operand are undef and returns that information in UndefElts. -/// -/// If the information about demanded elements can be used to simplify the -/// operation, the operation is simplified, then the resultant value is -/// returned. This returns null if no change was made. -Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt& UndefElts, - unsigned Depth) { - unsigned VWidth = cast(V->getType())->getNumElements(); - APInt EltMask(APInt::getAllOnesValue(VWidth)); - assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); - - if (isa(V)) { - // If the entire vector is undefined, just return this info. - UndefElts = EltMask; - return 0; - } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. - UndefElts = EltMask; - return UndefValue::get(V->getType()); - } - - UndefElts = 0; - if (ConstantVector *CP = dyn_cast(V)) { - const Type *EltTy = cast(V->getType())->getElementType(); - Constant *Undef = UndefValue::get(EltTy); - - std::vector Elts; - for (unsigned i = 0; i != VWidth; ++i) - if (!DemandedElts[i]) { // If not demanded, set to undef. - Elts.push_back(Undef); - UndefElts.set(i); - } else if (isa(CP->getOperand(i))) { // Already undef. - Elts.push_back(Undef); - UndefElts.set(i); - } else { // Otherwise, defined. - Elts.push_back(CP->getOperand(i)); - } + // X + X --> X << 1 + if (I.getType()->isInteger()) { + if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) + return Result; - // If we changed the constant, return it. - Constant *NewCP = ConstantVector::get(Elts); - return NewCP != CP ? NewCP : 0; - } else if (isa(V)) { - // Simplify the CAZ to a ConstantVector where the non-demanded elements are - // set to undef. - - // Check if this is identity. If so, return 0 since we are not simplifying - // anything. - if (DemandedElts == ((1ULL << VWidth) -1)) - return 0; - - const Type *EltTy = cast(V->getType())->getElementType(); - Constant *Zero = Constant::getNullValue(EltTy); - Constant *Undef = UndefValue::get(EltTy); - std::vector Elts; - for (unsigned i = 0; i != VWidth; ++i) { - Constant *Elt = DemandedElts[i] ? Zero : Undef; - Elts.push_back(Elt); + if (Instruction *RHSI = dyn_cast(RHS)) { + if (RHSI->getOpcode() == Instruction::Sub) + if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B + return ReplaceInstUsesWith(I, RHSI->getOperand(0)); + } + if (Instruction *LHSI = dyn_cast(LHS)) { + if (LHSI->getOpcode() == Instruction::Sub) + if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B + return ReplaceInstUsesWith(I, LHSI->getOperand(0)); } - UndefElts = DemandedElts ^ EltMask; - return ConstantVector::get(Elts); } - - // Limit search depth. - if (Depth == 10) - return 0; - // If multiple users are using the root value, procede with - // simplification conservatively assuming that all elements - // are needed. - if (!V->hasOneUse()) { - // Quit if we find multiple users of a non-root value though. - // They'll be handled when it's their turn to be visited by - // the main instcombine process. - if (Depth != 0) - // TODO: Just compute the UndefElts information recursively. - return 0; + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castNegVal(LHS)) { + if (LHS->getType()->isIntOrIntVector()) { + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + return BinaryOperator::CreateNeg(NewAdd); + } + } + + return BinaryOperator::CreateSub(RHS, LHSV); + } + + // A + -B --> A - B + if (!isa(RHS)) + if (Value *V = dyn_castNegVal(RHS)) + return BinaryOperator::CreateSub(LHS, V); + + + ConstantInt *C2; + if (Value *X = dyn_castFoldableMul(LHS, C2)) { + if (X == RHS) // X*C + X --> X * (C+1) + return BinaryOperator::CreateMul(RHS, AddOne(C2)); - // Conservatively assume that all elements are needed. - DemandedElts = EltMask; + // X*C1 + X*C2 --> X * (C1+C2) + ConstantInt *C1; + if (X == dyn_castFoldableMul(RHS, C1)) + return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); } + + // X + X*C --> X * (C+1) + if (dyn_castFoldableMul(RHS, C2) == LHS) + return BinaryOperator::CreateMul(LHS, AddOne(C2)); + + // X + ~X --> -1 since ~X = -X-1 + if (dyn_castNotVal(LHS) == RHS || + dyn_castNotVal(RHS) == LHS) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - Instruction *I = dyn_cast(V); - if (!I) return 0; // Only analyze instructions. + + // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 + if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) + if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2))) + return R; - bool MadeChange = false; - APInt UndefElts2(VWidth, 0); - Value *TmpV; - switch (I->getOpcode()) { - default: break; - - case Instruction::InsertElement: { - // If this is a variable index, we don't know which element it overwrites. - // demand exactly the same input as we produce. - ConstantInt *Idx = dyn_cast(I->getOperand(2)); - if (Idx == 0) { - // Note that we can't propagate undef elt info, because we don't know - // which elt is getting updated. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts2, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - break; - } - - // If this is inserting an element that isn't demanded, remove this - // insertelement. - unsigned IdxNo = Idx->getZExtValue(); - if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { - Worklist.Add(I); - return I->getOperand(0); + // A+B --> A|B iff A and B have no bits set in common. + if (const IntegerType *IT = dyn_cast(I.getType())) { + APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); + APInt LHSKnownOne(IT->getBitWidth(), 0); + APInt LHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + if (LHSKnownZero != 0) { + APInt RHSKnownOne(IT->getBitWidth(), 0); + APInt RHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + + // No bits in common -> bitwise or. + if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) + return BinaryOperator::CreateOr(LHS, RHS); } - - // Otherwise, the element inserted overwrites whatever was there, so the - // input demanded set is simpler than the output set. - APInt DemandedElts2 = DemandedElts; - DemandedElts2.clear(IdxNo); - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, - UndefElts, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - - // The inserted element is defined. - UndefElts.clear(IdxNo); - break; } - case Instruction::ShuffleVector: { - ShuffleVectorInst *Shuffle = cast(I); - uint64_t LHSVWidth = - cast(Shuffle->getOperand(0)->getType())->getNumElements(); - APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); - for (unsigned i = 0; i < VWidth; i++) { - if (DemandedElts[i]) { - unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal != -1u) { - assert(MaskVal < LHSVWidth * 2 && - "shufflevector mask index out of range!"); - if (MaskVal < LHSVWidth) - LeftDemanded.set(MaskVal); - else - RightDemanded.set(MaskVal - LHSVWidth); - } - } - } - APInt UndefElts4(LHSVWidth, 0); - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, - UndefElts4, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - - APInt UndefElts3(LHSVWidth, 0); - TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, - UndefElts3, Depth+1); - if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } - - bool NewUndefElts = false; - for (unsigned i = 0; i < VWidth; i++) { - unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal == -1u) { - UndefElts.set(i); - } else if (MaskVal < LHSVWidth) { - if (UndefElts4[MaskVal]) { - NewUndefElts = true; - UndefElts.set(i); - } - } else { - if (UndefElts3[MaskVal - LHSVWidth]) { - NewUndefElts = true; - UndefElts.set(i); + // W*X + Y*Z --> W * (X+Z) iff W == Y + if (I.getType()->isIntOrIntVector()) { + Value *W, *X, *Y, *Z; + if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && + match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { + if (W != Y) { + if (W == Z) { + std::swap(Y, Z); + } else if (Y == X) { + std::swap(W, X); + } else if (X == Z) { + std::swap(Y, Z); + std::swap(W, X); } } - } - if (NewUndefElts) { - // Add additional discovered undefs. - std::vector Elts; - for (unsigned i = 0; i < VWidth; ++i) { - if (UndefElts[i]) - Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); - else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()), - Shuffle->getMaskValue(i))); + if (W == Y) { + Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); + return BinaryOperator::CreateMul(W, NewAdd); } - I->setOperand(2, ConstantVector::get(Elts)); - MadeChange = true; } - break; } - case Instruction::BitCast: { - // Vector->vector casts only. - const VectorType *VTy = dyn_cast(I->getOperand(0)->getType()); - if (!VTy) break; - unsigned InVWidth = VTy->getNumElements(); - APInt InputDemandedElts(InVWidth, 0); - unsigned Ratio; - - if (VWidth == InVWidth) { - // If we are converting from <4 x i32> -> <4 x f32>, we demand the same - // elements as are demanded of us. - Ratio = 1; - InputDemandedElts = DemandedElts; - } else if (VWidth > InVWidth) { - // Untested so far. - break; - - // If there are more elements in the result than there are in the source, - // then an input element is live if any of the corresponding output - // elements are live. - Ratio = VWidth/InVWidth; - for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { - if (DemandedElts[OutIdx]) - InputDemandedElts.set(OutIdx/Ratio); + + if (ConstantInt *CRHS = dyn_cast(RHS)) { + Value *X = 0; + if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X + return BinaryOperator::CreateSub(SubOne(CRHS), X); + + // (X & FF00) + xx00 -> (X+xx00) & FF00 + if (LHS->hasOneUse() && + match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { + Constant *Anded = ConstantExpr::getAnd(CRHS, C2); + if (Anded == CRHS) { + // See if all bits from the first bit set in the Add RHS up are included + // in the mask. First, get the rightmost bit. + const APInt& AddRHSV = CRHS->getValue(); + + // Form a mask of all bits from the lowest bit added through the top. + APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); + + // See if the and mask includes all of these bits. + APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); + + if (AddRHSHighBits == AddRHSHighBitsAnd) { + // Okay, the xform is safe. Insert the new add pronto. + Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + return BinaryOperator::CreateAnd(NewAdd, C2); + } } - } else { - // Untested so far. - break; - - // If there are more elements in the source than there are in the result, - // then an input element is live if the corresponding output element is - // live. - Ratio = InVWidth/VWidth; - for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) - if (DemandedElts[InIdx/Ratio]) - InputDemandedElts.set(InIdx); } - - // div/rem demand all inputs, because they don't want divide by zero. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, - UndefElts2, Depth+1); - if (TmpV) { - I->setOperand(0, TmpV); - MadeChange = true; + + // Try to fold constant add into select arguments. + if (SelectInst *SI = dyn_cast(LHS)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + } + + // add (select X 0 (sub n A)) A --> select X A n + { + SelectInst *SI = dyn_cast(LHS); + Value *A = RHS; + if (!SI) { + SI = dyn_cast(RHS); + A = LHS; } - - UndefElts = UndefElts2; - if (VWidth > InVWidth) { - llvm_unreachable("Unimp"); - // If there are more elements in the result than there are in the source, - // then an output element is undef if the corresponding input element is - // undef. - for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) - if (UndefElts2[OutIdx/Ratio]) - UndefElts.set(OutIdx); - } else if (VWidth < InVWidth) { - llvm_unreachable("Unimp"); - // If there are more elements in the source than there are in the result, - // then a result element is undef if all of the corresponding input - // elements are undef. - UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. - for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) - if (!UndefElts2[InIdx]) // Not undef? - UndefElts.clear(InIdx/Ratio); // Clear undef bit. + if (SI && SI->hasOneUse()) { + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); + Value *N; + + // Can we fold the add into the argument of the select? + // We check both true and false select arguments for a matching subtract. + if (match(FV, m_Zero()) && + match(TV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the true select value. + return SelectInst::Create(SI->getCondition(), N, A); + if (match(TV, m_Zero()) && + match(FV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the false select value. + return SelectInst::Create(SI->getCondition(), A, N); } - break; } - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // div/rem demand all inputs, because they don't want divide by zero. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, - UndefElts2, Depth+1); - if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } - - // Output elements are undefined if both are undefined. Consider things - // like undef&0. The result is known zero, not undef. - UndefElts &= UndefElts2; - break; + + // Check for (add (sext x), y), see if we can merge this into an + // integer add followed by a sext. + if (SExtInst *LHSConv = dyn_cast(LHS)) { + // (add (sext x), cst) --> (sext (add x, cst')) + if (ConstantInt *RHSC = dyn_cast(RHS)) { + Constant *CI = + ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSExt(CI, I.getType()) == RHSC && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new, smaller add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + CI, "addconv"); + return new SExtInst(NewAdd, I.getType()); + } + } - case Instruction::Call: { - IntrinsicInst *II = dyn_cast(I); - if (!II) break; - switch (II->getIntrinsicID()) { - default: break; - - // Binary vector operations that work column-wise. A dest element is a - // function of the corresponding input elements from the two inputs. - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse_min_ss: - case Intrinsic::x86_sse_max_ss: - case Intrinsic::x86_sse2_sub_sd: - case Intrinsic::x86_sse2_mul_sd: - case Intrinsic::x86_sse2_min_sd: - case Intrinsic::x86_sse2_max_sd: - TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, - UndefElts, Depth+1); - if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, - UndefElts2, Depth+1); - if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } - - // If only the low elt is demanded and this is a scalarizable intrinsic, - // scalarize it now. - if (DemandedElts == 1) { - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse2_sub_sd: - case Intrinsic::x86_sse2_mul_sd: - // TODO: Lower MIN/MAX/ABS/etc - Value *LHS = II->getOperand(1); - Value *RHS = II->getOperand(2); - // Extract the element as scalars. - LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, - ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); - RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, - ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); - - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Case stmts out of sync!"); - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse2_sub_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, - II->getName()), *II); - break; - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse2_mul_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS, - II->getName()), *II); - break; - } - - Instruction *New = - InsertElementInst::Create( - UndefValue::get(II->getType()), TmpV, - ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false), - II->getName()); - InsertNewInstBefore(New, *II); - return New; - } + // (add (sext x), (sext y)) --> (sext (add int x, y)) + if (SExtInst *RHSConv = dyn_cast(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of sexts), and if the + // integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), "addconv"); + return new SExtInst(NewAdd, I.getType()); } - - // Output elements are undefined if both are undefined. Consider things - // like undef&0. The result is known zero, not undef. - UndefElts &= UndefElts2; - break; } - break; } - } - return MadeChange ? I : 0; -} - - -/// AssociativeOpt - Perform an optimization on an associative operator. This -/// function is designed to check a chain of associative operators for a -/// potential to apply a certain optimization. Since the optimization may be -/// applicable if the expression was reassociated, this checks the chain, then -/// reassociates the expression as necessary to expose the optimization -/// opportunity. This makes use of a special Functor, which must define -/// 'shouldApply' and 'apply' methods. -/// -template -static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { - unsigned Opcode = Root.getOpcode(); - Value *LHS = Root.getOperand(0); - // Quick check, see if the immediate LHS matches... - if (F.shouldApply(LHS)) - return F.apply(Root); + return Changed ? &I : 0; +} - // Otherwise, if the LHS is not of the same opcode as the root, return. - Instruction *LHSI = dyn_cast(LHS); - while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) { - // Should we apply this transform to the RHS? - bool ShouldApply = F.shouldApply(LHSI->getOperand(1)); +Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - // If not to the RHS, check to see if we should apply to the LHS... - if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) { - cast(LHSI)->swapOperands(); // Make the LHS the RHS - ShouldApply = true; + if (Constant *RHSC = dyn_cast(RHS)) { + // X + 0 --> X + if (ConstantFP *CFP = dyn_cast(RHSC)) { + if (CFP->isExactlyValue(ConstantFP::getNegativeZero + (I.getType())->getValueAPF())) + return ReplaceInstUsesWith(I, LHS); } - // If the functor wants to apply the optimization to the RHS of LHSI, - // reassociate the expression from ((? op A) op B) to (? op (A op B)) - if (ShouldApply) { - // Now all of the instructions are in the current basic block, go ahead - // and perform the reassociation. - Instruction *TmpLHSI = cast(Root.getOperand(0)); + if (isa(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } - // First move the selected RHS to the LHS of the root... - Root.setOperand(0, LHSI->getOperand(1)); + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castFNegVal(LHS)) + return BinaryOperator::CreateFSub(RHS, LHSV); - // Make what used to be the LHS of the root be the user of the root... - Value *ExtraOperand = TmpLHSI->getOperand(1); - if (&Root == TmpLHSI) { - Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType())); - return 0; - } - Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI - TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root - BasicBlock::iterator ARI = &Root; ++ARI; - TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root - ARI = Root; + // A + -B --> A - B + if (!isa(RHS)) + if (Value *V = dyn_castFNegVal(RHS)) + return BinaryOperator::CreateFSub(LHS, V); - // Now propagate the ExtraOperand down the chain of instructions until we - // get to LHSI. - while (TmpLHSI != LHSI) { - Instruction *NextLHSI = cast(TmpLHSI->getOperand(0)); - // Move the instruction to immediately before the chain we are - // constructing to avoid breaking dominance properties. - NextLHSI->moveBefore(ARI); - ARI = NextLHSI; + // Check for X+0.0. Simplify it to X if we know X is not -0.0. + if (ConstantFP *CFP = dyn_cast(RHS)) + if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) + return ReplaceInstUsesWith(I, LHS); - Value *NextOp = NextLHSI->getOperand(1); - NextLHSI->setOperand(1, ExtraOperand); - TmpLHSI = NextLHSI; - ExtraOperand = NextOp; + // Check for (add double (sitofp x), y), see if we can merge this into an + // integer add followed by a promotion. + if (SIToFPInst *LHSConv = dyn_cast(LHS)) { + // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) + // ... if the constant fits in the integer value. This is useful for things + // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer + // requires a constant pool load, and generally allows the add to be better + // instcombined. + if (ConstantFP *CFP = dyn_cast(RHS)) { + Constant *CI = + ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + CI, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } + } + + // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) + if (SIToFPInst *RHSConv = dyn_cast(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of int->fp conversions), + // and if the integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0),"addconv"); + return new SIToFPInst(NewAdd, I.getType()); } - - // Now that the instructions are reassociated, have the functor perform - // the transformation... - return F.apply(Root); } - - LHSI = dyn_cast(LHSI->getOperand(0)); } - return 0; + + return Changed ? &I : 0; } -namespace { -// AddRHS - Implements: X + X --> X << 1 -struct AddRHS { - Value *RHS; - explicit AddRHS(Value *rhs) : RHS(rhs) {} - bool shouldApply(Value *LHS) const { return LHS == RHS; } - Instruction *apply(BinaryOperator &Add) const { - return BinaryOperator::CreateShl(Add.getOperand(0), - ConstantInt::get(Add.getType(), 1)); - } -}; +/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the +/// code necessary to compute the offset from the base pointer (without adding +/// in the base pointer). Return the result as a signed integer of intptr size. +Value *InstCombiner::EmitGEPOffset(User *GEP) { + TargetData &TD = *getTargetData(); + gep_type_iterator GTI = gep_type_begin(GEP); + const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); + Value *Result = Constant::getNullValue(IntPtrTy); -// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2) -// iff C1&C2 == 0 -struct AddMaskingAnd { - Constant *C2; - explicit AddMaskingAnd(Constant *c) : C2(c) {} - bool shouldApply(Value *LHS) const { - ConstantInt *C1; - return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && - ConstantExpr::getAnd(C1, C2)->isNullValue(); - } - Instruction *apply(BinaryOperator &Add) const { - return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); - } -}; + // Build a mask for high order bits. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; + ++i, ++GTI) { + Value *Op = *i; + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; + if (ConstantInt *OpC = dyn_cast(Op)) { + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + + Result = Builder->CreateAdd(Result, + ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); + continue; + } + + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + Constant *OC = + ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale); + // Emit an add instruction. + Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); + continue; + } + // Convert to correct type. + if (Op->getType() != IntPtrTy) + Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); + if (Size != 1) { + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + // We'll let instcombine(mul) convert this to a shl if possible. + Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); + } + // Emit an add instruction. + Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); + } + return Result; } -static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, - InstCombiner *IC) { - if (CastInst *CI = dyn_cast(&I)) - return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); - // Figure out if the constant is the left or the right argument. - bool ConstIsRHS = isa(I.getOperand(1)); - Constant *ConstOperand = cast(I.getOperand(ConstIsRHS)); - if (Constant *SOC = dyn_cast(SO)) { - if (ConstIsRHS) - return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); - return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); - } - Value *Op0 = SO, *Op1 = ConstOperand; - if (!ConstIsRHS) - std::swap(Op0, Op1); +/// Optimize pointer differences into the same array into a size. Consider: +/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer +/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. +/// +Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, + const Type *Ty) { + assert(TD && "Must have target data info for this"); - if (BinaryOperator *BO = dyn_cast(&I)) - return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, - SO->getName()+".op"); - if (ICmpInst *CI = dyn_cast(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, - SO->getName()+".cmp"); - if (FCmpInst *CI = dyn_cast(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, - SO->getName()+".cmp"); - llvm_unreachable("Unknown binary instruction type!"); + // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize + // this. + bool Swapped = false; + GetElementPtrInst *GEP = 0; + ConstantExpr *CstGEP = 0; + + // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo". + // For now we require one side to be the base pointer "A" or a constant + // expression derived from it. + if (GetElementPtrInst *LHSGEP = dyn_cast(LHS)) { + // (gep X, ...) - X + if (LHSGEP->getOperand(0) == RHS) { + GEP = LHSGEP; + Swapped = false; + } else if (ConstantExpr *CE = dyn_cast(RHS)) { + // (gep X, ...) - (ce_gep X, ...) + if (CE->getOpcode() == Instruction::GetElementPtr && + LHSGEP->getOperand(0) == CE->getOperand(0)) { + CstGEP = CE; + GEP = LHSGEP; + Swapped = false; + } + } + } + + if (GetElementPtrInst *RHSGEP = dyn_cast(RHS)) { + // X - (gep X, ...) + if (RHSGEP->getOperand(0) == LHS) { + GEP = RHSGEP; + Swapped = true; + } else if (ConstantExpr *CE = dyn_cast(LHS)) { + // (ce_gep X, ...) - (gep X, ...) + if (CE->getOpcode() == Instruction::GetElementPtr && + RHSGEP->getOperand(0) == CE->getOperand(0)) { + CstGEP = CE; + GEP = RHSGEP; + Swapped = true; + } + } + } + + if (GEP == 0) + return 0; + + // Emit the offset of the GEP and an intptr_t. + Value *Result = EmitGEPOffset(GEP); + + // If we had a constant expression GEP on the other side offsetting the + // pointer, subtract it from the offset we have. + if (CstGEP) { + Value *CstOffset = EmitGEPOffset(CstGEP); + Result = Builder->CreateSub(Result, CstOffset); + } + + + // If we have p - gep(p, ...) then we have to negate the result. + if (Swapped) + Result = Builder->CreateNeg(Result, "diff.neg"); + + return Builder->CreateIntCast(Result, Ty, true); } -// FoldOpIntoSelect - Given an instruction with a select as one operand and a -// constant as the other operand, try to fold the binary operator into the -// select arguments. This also works for Cast instructions, which obviously do -// not have a second operand. -static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, - InstCombiner *IC) { - // Don't modify shared select instructions - if (!SI->hasOneUse()) return 0; - Value *TV = SI->getOperand(1); - Value *FV = SI->getOperand(2); - if (isa(TV) || isa(FV)) { - // Bool selects with constant operands can be folded to logical ops. - if (SI->getType() == Type::getInt1Ty(SI->getContext())) return 0; +Instruction *InstCombiner::visitSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC); - Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC); + if (Op0 == Op1) // sub X, X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - return SelectInst::Create(SI->getCondition(), SelectTrueVal, - SelectFalseVal); + // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. + if (Value *V = dyn_castNegVal(Op1)) { + BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); + Res->setHasNoSignedWrap(I.hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return Res; } - return 0; -} - -/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which -/// has a PHI node as operand #0, see if we can fold the instruction into the -/// PHI (which is only possible if all operands to the PHI are constants). -/// -/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms -/// that would normally be unprofitable because they strongly encourage jump -/// threading. -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, - bool AllowAggressive) { - AllowAggressive = false; - PHINode *PN = cast(I.getOperand(0)); - unsigned NumPHIValues = PN->getNumIncomingValues(); - if (NumPHIValues == 0 || - // We normally only transform phis with a single use, unless we're trying - // hard to make jump threading happen. - (!PN->hasOneUse() && !AllowAggressive)) - return 0; - - - // Check to see if all of the operands of the PHI are simple constants - // (constantint/constantfp/undef). If there is one non-constant value, - // remember the BB it is in. If there is more than one or if *it* is a PHI, - // bail out. We don't do arbitrary constant expressions here because moving - // their computation can be expensive without a cost model. - BasicBlock *NonConstBB = 0; - for (unsigned i = 0; i != NumPHIValues; ++i) - if (!isa(PN->getIncomingValue(i)) || - isa(PN->getIncomingValue(i))) { - if (NonConstBB) return 0; // More than one non-const value. - if (isa(PN->getIncomingValue(i))) return 0; // Itself a phi. - NonConstBB = PN->getIncomingBlock(i); - - // If the incoming non-constant value is in I's block, we have an infinite - // loop. - if (NonConstBB == I.getParent()) - return 0; - } + if (isa(Op0)) + return ReplaceInstUsesWith(I, Op0); // undef - X -> undef + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); // X - undef -> undef + if (I.getType() == Type::getInt1Ty(I.getContext())) + return BinaryOperator::CreateXor(Op0, Op1); - // If there is exactly one non-constant value, we can insert a copy of the - // operation in that block. However, if this is a critical edge, we would be - // inserting the computation one some other paths (e.g. inside a loop). Only - // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB != 0 && !AllowAggressive) { - BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); - if (!BI || !BI->isUnconditional()) return 0; - } + if (ConstantInt *C = dyn_cast(Op0)) { + // Replace (-1 - A) with (~A). + if (C->isAllOnesValue()) + return BinaryOperator::CreateNot(Op1); - // Okay, we can do the transformation: create the new PHI node. - PHINode *NewPN = PHINode::Create(I.getType(), ""); - NewPN->reserveOperandSpace(PN->getNumOperands()/2); - InsertNewInstBefore(NewPN, *PN); - NewPN->takeName(PN); + // C - ~X == X + (1+C) + Value *X = 0; + if (match(Op1, m_Not(m_Value(X)))) + return BinaryOperator::CreateAdd(X, AddOne(C)); - // Next, add all of the operands to the PHI. - if (SelectInst *SI = dyn_cast(&I)) { - // We only currently try to fold the condition of a select when it is a phi, - // not the true/false values. - Value *TrueV = SI->getTrueValue(); - Value *FalseV = SI->getFalseValue(); - BasicBlock *PhiTransBB = PN->getParent(); - for (unsigned i = 0; i != NumPHIValues; ++i) { - BasicBlock *ThisBB = PN->getIncomingBlock(i); - Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); - Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); - Value *InV = 0; - if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { - InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, - FalseVInPred, - "phitmp", NonConstBB->getTerminator()); - Worklist.Add(cast(InV)); - } - NewPN->addIncoming(InV, ThisBB); - } - } else if (I.getNumOperands() == 2) { - Constant *C = cast(I.getOperand(1)); - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = 0; - if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { - if (CmpInst *CI = dyn_cast(&I)) - InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); - else - InV = ConstantExpr::get(I.getOpcode(), InC, C); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - if (BinaryOperator *BO = dyn_cast(&I)) - InV = BinaryOperator::Create(BO->getOpcode(), - PN->getIncomingValue(i), C, "phitmp", - NonConstBB->getTerminator()); - else if (CmpInst *CI = dyn_cast(&I)) - InV = CmpInst::Create(CI->getOpcode(), - CI->getPredicate(), - PN->getIncomingValue(i), C, "phitmp", - NonConstBB->getTerminator()); - else - llvm_unreachable("Unknown binop!"); - - Worklist.Add(cast(InV)); + // -(X >>u 31) -> (X >>s 31) + // -(X >>s 31) -> (X >>u 31) + if (C->isZero()) { + if (BinaryOperator *SI = dyn_cast(Op1)) { + if (SI->getOpcode() == Instruction::LShr) { + if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { + // Check to see if we are shifting out everything but the sign bit. + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == + SI->getType()->getPrimitiveSizeInBits()-1) { + // Ok, the transformation is safe. Insert AShr. + return BinaryOperator::Create(Instruction::AShr, + SI->getOperand(0), CU, SI->getName()); + } + } + } else if (SI->getOpcode() == Instruction::AShr) { + if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { + // Check to see if we are shifting out everything but the sign bit. + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == + SI->getType()->getPrimitiveSizeInBits()-1) { + // Ok, the transformation is safe. Insert LShr. + return BinaryOperator::CreateLShr( + SI->getOperand(0), CU, SI->getName()); + } + } + } } - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } - } else { - CastInst *CI = cast(&I); - const Type *RetTy = CI->getType(); - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV; - if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { - InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), - I.getType(), "phitmp", - NonConstBB->getTerminator()); - Worklist.Add(cast(InV)); + + // Try to fold constant sub into select arguments. + if (SelectInst *SI = dyn_cast(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + // C - zext(bool) -> bool ? C - 1 : C + if (ZExtInst *ZI = dyn_cast(Op1)) + if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) + return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); + } + + if (BinaryOperator *Op1I = dyn_cast(Op1)) { + if (Op1I->getOpcode() == Instruction::Add) { + if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y + return BinaryOperator::CreateNeg(Op1I->getOperand(1), + I.getName()); + else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y + return BinaryOperator::CreateNeg(Op1I->getOperand(0), + I.getName()); + else if (ConstantInt *CI1 = dyn_cast(I.getOperand(0))) { + if (ConstantInt *CI2 = dyn_cast(Op1I->getOperand(1))) + // C1-(X+C2) --> (C1-C2)-X + return BinaryOperator::CreateSub( + ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); } - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } - } - return ReplaceInstUsesWith(I, NewPN); -} + if (Op1I->hasOneUse()) { + // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression + // is not used by anyone else... + // + if (Op1I->getOpcode() == Instruction::Sub) { + // Swap the two operands of the subexpr... + Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); + Op1I->setOperand(0, IIOp1); + Op1I->setOperand(1, IIOp0); -/// WillNotOverflowSignedAdd - Return true if we can prove that: -/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) -/// This basically requires proving that the add in the original type would not -/// overflow to change the sign bit or have a carry out. -bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { - // There are different heuristics we can use for this. Here are some simple - // ones. - - // Add has the property that adding any two 2's complement numbers can only - // have one carry bit which can change a sign. As such, if LHS and RHS each - // have at least two sign bits, we know that the addition of the two values - // will sign extend fine. - if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) - return true; - - - // If one of the operands only has one non-zero bit, and if the other operand - // has a known-zero bit in a more significant place than it (not including the - // sign bit) the ripple may go up to and fill the zero, but won't change the - // sign. For example, (X & ~4) + 1. - - // TODO: Implement. - - return false; -} + // Create the new top level add instruction... + return BinaryOperator::CreateAdd(Op0, Op1); + } + // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... + // + if (Op1I->getOpcode() == Instruction::And && + (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { + Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); -Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); + return BinaryOperator::CreateAnd(Op0, NewNot); + } - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), TD)) - return ReplaceInstUsesWith(I, V); + // 0 - (X sdiv C) -> (X sdiv -C) + if (Op1I->getOpcode() == Instruction::SDiv) + if (ConstantInt *CSI = dyn_cast(Op0)) + if (CSI->isZero()) + if (Constant *DivRHS = dyn_cast(Op1I->getOperand(1))) + return BinaryOperator::CreateSDiv(Op1I->getOperand(0), + ConstantExpr::getNeg(DivRHS)); - - if (Constant *RHSC = dyn_cast(RHS)) { - if (ConstantInt *CI = dyn_cast(RHSC)) { - // X + (signbit) --> X ^ signbit - const APInt& Val = CI->getValue(); - uint32_t BitWidth = Val.getBitWidth(); - if (Val == APInt::getSignBit(BitWidth)) - return BinaryOperator::CreateXor(LHS, RHS); - - // See if SimplifyDemandedBits can simplify this. This handles stuff like - // (X & 254)+1 -> (X&254)|1 - if (SimplifyDemandedInstructionBits(I)) - return &I; + // X - X*C --> X * (1-C) + ConstantInt *C2 = 0; + if (dyn_castFoldableMul(Op1I, C2) == Op0) { + Constant *CP1 = + ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), + C2); + return BinaryOperator::CreateMul(Op0, CP1); + } + } + } - // zext(bool) + C -> bool ? C + 1 : C - if (ZExtInst *ZI = dyn_cast(LHS)) - if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) - return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + if (Op0I->getOpcode() == Instruction::Add) { + if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(1)); + else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(0)); + } else if (Op0I->getOpcode() == Instruction::Sub) { + if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y + return BinaryOperator::CreateNeg(Op0I->getOperand(1), + I.getName()); } + } - if (isa(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + ConstantInt *C1; + if (Value *X = dyn_castFoldableMul(Op0, C1)) { + if (X == Op1) // X*C - X --> X * (C-1) + return BinaryOperator::CreateMul(Op1, SubOne(C1)); + + ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) + if (X == dyn_castFoldableMul(Op1, C2)) + return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); + } + + // Optimize pointer differences into the same array into a size. Consider: + // &A[10] - &A[0]: we should compile this to "10". + if (TD) { + Value *LHSOp, *RHSOp; + if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); - ConstantInt *XorRHS = 0; - Value *XorLHS = 0; - if (isa(RHSC) && - match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { - uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); - const APInt& RHSVal = cast(RHSC)->getValue(); - - uint32_t Size = TySizeBits / 2; - APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); - APInt CFF80Val(-C0080Val); - do { - if (TySizeBits > Size) { - // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. - // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. - if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || - (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { - // This is a sign extend if the top bits are known zero. - if (!MaskedValueIsZero(XorLHS, - APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) - Size = 0; // Not a sign ext, but can't be any others either. - break; - } - } - Size >>= 1; - C0080Val = APIntOps::lshr(C0080Val, Size); - CFF80Val = APIntOps::ashr(CFF80Val, Size); - } while (Size >= 1); - - // FIXME: This shouldn't be necessary. When the backends can handle types - // with funny bit widths then this switch statement should be removed. It - // is just here to get the size of the "middle" type back up to something - // that the back ends can handle. - const Type *MiddleType = 0; - switch (Size) { - default: break; - case 32: - case 16: - case 8: MiddleType = IntegerType::get(I.getContext(), Size); break; - } - if (MiddleType) { - Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); - return new SExtInst(NewTrunc, I.getType(), I.getName()); - } - } + // trunc(p)-trunc(q) -> trunc(p-q) + if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); } + + return 0; +} - if (I.getType() == Type::getInt1Ty(I.getContext())) - return BinaryOperator::CreateXor(LHS, RHS); +Instruction *InstCombiner::visitFSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // X + X --> X << 1 - if (I.getType()->isInteger()) { - if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) - return Result; + // If this is a 'B = x-(-A)', change to B = x+A... + if (Value *V = dyn_castFNegVal(Op1)) + return BinaryOperator::CreateFAdd(Op0, V); - if (Instruction *RHSI = dyn_cast(RHS)) { - if (RHSI->getOpcode() == Instruction::Sub) - if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B - return ReplaceInstUsesWith(I, RHSI->getOperand(0)); - } - if (Instruction *LHSI = dyn_cast(LHS)) { - if (LHSI->getOpcode() == Instruction::Sub) - if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B - return ReplaceInstUsesWith(I, LHSI->getOperand(0)); + if (BinaryOperator *Op1I = dyn_cast(Op1)) { + if (Op1I->getOpcode() == Instruction::FAdd) { + if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y + return BinaryOperator::CreateFNeg(Op1I->getOperand(1), + I.getName()); + else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y + return BinaryOperator::CreateFNeg(Op1I->getOperand(0), + I.getName()); } } - // -A + B --> B - A - // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castNegVal(LHS)) { - if (LHS->getType()->isIntOrIntVector()) { - if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); - return BinaryOperator::CreateNeg(NewAdd); - } - } - - return BinaryOperator::CreateSub(RHS, LHSV); - } + return 0; +} - // A + -B --> A - B - if (!isa(RHS)) - if (Value *V = dyn_castNegVal(RHS)) - return BinaryOperator::CreateSub(LHS, V); +Instruction *InstCombiner::visitMul(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (isa(Op1)) // undef * X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - ConstantInt *C2; - if (Value *X = dyn_castFoldableMul(LHS, C2)) { - if (X == RHS) // X*C + X --> X * (C+1) - return BinaryOperator::CreateMul(RHS, AddOne(C2)); + // Simplify mul instructions with a constant RHS. + if (Constant *Op1C = dyn_cast(Op1)) { + if (ConstantInt *CI = dyn_cast(Op1C)) { - // X*C1 + X*C2 --> X * (C1+C2) - ConstantInt *C1; - if (X == dyn_castFoldableMul(RHS, C1)) - return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); - } + // ((X << C1)*C2) == (X * (C2 << C1)) + if (BinaryOperator *SI = dyn_cast(Op0)) + if (SI->getOpcode() == Instruction::Shl) + if (Constant *ShOp = dyn_cast(SI->getOperand(1))) + return BinaryOperator::CreateMul(SI->getOperand(0), + ConstantExpr::getShl(CI, ShOp)); - // X + X*C --> X * (C+1) - if (dyn_castFoldableMul(RHS, C2) == LHS) - return BinaryOperator::CreateMul(LHS, AddOne(C2)); + if (CI->isZero()) + return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 + if (CI->equalsInt(1)) // X * 1 == X + return ReplaceInstUsesWith(I, Op0); + if (CI->isAllOnesValue()) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); - // X + ~X --> -1 since ~X = -X-1 - if (dyn_castNotVal(LHS) == RHS || - dyn_castNotVal(RHS) == LHS) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - + const APInt& Val = cast(CI)->getValue(); + if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C + return BinaryOperator::CreateShl(Op0, + ConstantInt::get(Op0->getType(), Val.logBase2())); + } + } else if (isa(Op1C->getType())) { + if (Op1C->isNullValue()) + return ReplaceInstUsesWith(I, Op1C); - // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 - if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) - if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2))) - return R; - - // A+B --> A|B iff A and B have no bits set in common. - if (const IntegerType *IT = dyn_cast(I.getType())) { - APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); - APInt LHSKnownOne(IT->getBitWidth(), 0); - APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); - if (LHSKnownZero != 0) { - APInt RHSKnownOne(IT->getBitWidth(), 0); - APInt RHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); - - // No bits in common -> bitwise or. - if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) - return BinaryOperator::CreateOr(LHS, RHS); - } - } + if (ConstantVector *Op1V = dyn_cast(Op1C)) { + if (Op1V->isAllOnesValue()) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); - // W*X + Y*Z --> W * (X+Z) iff W == Y - if (I.getType()->isIntOrIntVector()) { - Value *W, *X, *Y, *Z; - if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && - match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { - if (W != Y) { - if (W == Z) { - std::swap(Y, Z); - } else if (Y == X) { - std::swap(W, X); - } else if (X == Z) { - std::swap(Y, Z); - std::swap(W, X); + // As above, vector X*splat(1.0) -> X in all defined cases. + if (Constant *Splat = Op1V->getSplatValue()) { + if (ConstantInt *CI = dyn_cast(Splat)) + if (CI->equalsInt(1)) + return ReplaceInstUsesWith(I, Op0); } } - - if (W == Y) { - Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); - return BinaryOperator::CreateMul(W, NewAdd); - } } - } + + if (BinaryOperator *Op0I = dyn_cast(Op0)) + if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && + isa(Op0I->getOperand(1)) && isa(Op1C)) { + // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. + Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); + Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); + return BinaryOperator::CreateAdd(Add, C1C2); + + } - if (ConstantInt *CRHS = dyn_cast(RHS)) { - Value *X = 0; - if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X - return BinaryOperator::CreateSub(SubOne(CRHS), X); + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; - // (X & FF00) + xx00 -> (X+xx00) & FF00 - if (LHS->hasOneUse() && - match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = ConstantExpr::getAnd(CRHS, C2); - if (Anded == CRHS) { - // See if all bits from the first bit set in the Add RHS up are included - // in the mask. First, get the rightmost bit. - const APInt& AddRHSV = CRHS->getValue(); + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } - // Form a mask of all bits from the lowest bit added through the top. - APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); - - // See if the and mask includes all of these bits. - APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); + if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castNegVal(Op1)) + return BinaryOperator::CreateMul(Op0v, Op1v); - if (AddRHSHighBits == AddRHSHighBitsAnd) { - // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); - return BinaryOperator::CreateAnd(NewAdd, C2); - } - } + // (X / Y) * Y = X - (X % Y) + // (X / Y) * -Y = (X % Y) - X + { + Value *Op1C = Op1; + BinaryOperator *BO = dyn_cast(Op0); + if (!BO || + (BO->getOpcode() != Instruction::UDiv && + BO->getOpcode() != Instruction::SDiv)) { + Op1C = Op0; + BO = dyn_cast(Op1); } + Value *Neg = dyn_castNegVal(Op1C); + if (BO && BO->hasOneUse() && + (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && + (BO->getOpcode() == Instruction::UDiv || + BO->getOpcode() == Instruction::SDiv)) { + Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); - // Try to fold constant add into select arguments. - if (SelectInst *SI = dyn_cast(LHS)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - } + // If the division is exact, X % Y is zero. + if (SDivOperator *SDiv = dyn_cast(BO)) + if (SDiv->isExact()) { + if (Op1BO == Op1C) + return ReplaceInstUsesWith(I, Op0BO); + return BinaryOperator::CreateNeg(Op0BO); + } - // add (select X 0 (sub n A)) A --> select X A n - { - SelectInst *SI = dyn_cast(LHS); - Value *A = RHS; - if (!SI) { - SI = dyn_cast(RHS); - A = LHS; - } - if (SI && SI->hasOneUse()) { - Value *TV = SI->getTrueValue(); - Value *FV = SI->getFalseValue(); - Value *N; + Value *Rem; + if (BO->getOpcode() == Instruction::UDiv) + Rem = Builder->CreateURem(Op0BO, Op1BO); + else + Rem = Builder->CreateSRem(Op0BO, Op1BO); + Rem->takeName(BO); - // Can we fold the add into the argument of the select? - // We check both true and false select arguments for a matching subtract. - if (match(FV, m_Zero()) && - match(TV, m_Sub(m_Value(N), m_Specific(A)))) - // Fold the add into the true select value. - return SelectInst::Create(SI->getCondition(), N, A); - if (match(TV, m_Zero()) && - match(FV, m_Sub(m_Value(N), m_Specific(A)))) - // Fold the add into the false select value. - return SelectInst::Create(SI->getCondition(), A, N); + if (Op1BO == Op1C) + return BinaryOperator::CreateSub(Op0BO, Rem); + return BinaryOperator::CreateSub(Rem, Op0BO); } } - // Check for (add (sext x), y), see if we can merge this into an - // integer add followed by a sext. - if (SExtInst *LHSConv = dyn_cast(LHS)) { - // (add (sext x), cst) --> (sext (add x, cst')) - if (ConstantInt *RHSC = dyn_cast(RHS)) { - Constant *CI = - ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { - // Insert the new, smaller add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI, "addconv"); - return new SExtInst(NewAdd, I.getType()); - } - } + /// i1 mul -> i1 and. + if (I.getType() == Type::getInt1Ty(I.getContext())) + return BinaryOperator::CreateAnd(Op0, Op1); + + // X*(1 << Y) --> X << Y + // (1 << Y)*X --> X << Y + { + Value *Y; + if (match(Op0, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op1, Y); + if (match(Op1, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op0, Y); + } + + // If one of the operands of the multiply is a cast from a boolean value, then + // we know the bool is either zero or one, so this is a 'masking' multiply. + // X * Y (where Y is 0 or 1) -> X & (0-Y) + if (!isa(I.getType())) { + // -2 is "-1 << 1" so it is all bits set except the low one. + APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); - // (add (sext x), (sext y)) --> (sext (add int x, y)) - if (SExtInst *RHSConv = dyn_cast(RHS)) { - // Only do this if x/y have the same type, if at last one of them has a - // single use (so we don't increase the number of sexts), and if the - // integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0))) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), "addconv"); - return new SExtInst(NewAdd, I.getType()); - } + Value *BoolCast = 0, *OtherOp = 0; + if (MaskedValueIsZero(Op0, Negative2)) + BoolCast = Op0, OtherOp = Op1; + else if (MaskedValueIsZero(Op1, Negative2)) + BoolCast = Op1, OtherOp = Op0; + + if (BoolCast) { + Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), + BoolCast, "tmp"); + return BinaryOperator::CreateAnd(V, OtherOp); } } return Changed ? &I : 0; } -Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { +Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Constant *RHSC = dyn_cast(RHS)) { - // X + 0 --> X - if (ConstantFP *CFP = dyn_cast(RHSC)) { - if (CFP->isExactlyValue(ConstantFP::getNegativeZero - (I.getType())->getValueAPF())) - return ReplaceInstUsesWith(I, LHS); + // Simplify mul instructions with a constant RHS... + if (Constant *Op1C = dyn_cast(Op1)) { + if (ConstantFP *Op1F = dyn_cast(Op1C)) { + // "In IEEE floating point, x*1 is not equivalent to x for nans. However, + // ANSI says we can drop signals, so we can do this anyway." (from GCC) + if (Op1F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' + } else if (isa(Op1C->getType())) { + if (ConstantVector *Op1V = dyn_cast(Op1C)) { + // As above, vector X*splat(1.0) -> X in all defined cases. + if (Constant *Splat = Op1V->getSplatValue()) { + if (ConstantFP *F = dyn_cast(Splat)) + if (F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); + } + } } - if (isa(LHS)) + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (isa(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; } - // -A + B --> B - A - // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castFNegVal(LHS)) - return BinaryOperator::CreateFSub(RHS, LHSV); - - // A + -B --> A - B - if (!isa(RHS)) - if (Value *V = dyn_castFNegVal(RHS)) - return BinaryOperator::CreateFSub(LHS, V); + if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castFNegVal(Op1)) + return BinaryOperator::CreateFMul(Op0v, Op1v); - // Check for X+0.0. Simplify it to X if we know X is not -0.0. - if (ConstantFP *CFP = dyn_cast(RHS)) - if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) - return ReplaceInstUsesWith(I, LHS); + return Changed ? &I : 0; +} - // Check for (add double (sitofp x), y), see if we can merge this into an - // integer add followed by a promotion. - if (SIToFPInst *LHSConv = dyn_cast(LHS)) { - // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) - // ... if the constant fits in the integer value. This is useful for things - // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer - // requires a constant pool load, and generally allows the add to be better - // instcombined. - if (ConstantFP *CFP = dyn_cast(RHS)) { - Constant *CI = - ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); +/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select +/// instruction. +bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { + SelectInst *SI = cast(I.getOperand(1)); + + // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y + int NonNullOperand = -1; + if (Constant *ST = dyn_cast(SI->getOperand(1))) + if (ST->isNullValue()) + NonNullOperand = 2; + // div/rem X, (Cond ? Y : 0) -> div/rem X, Y + if (Constant *ST = dyn_cast(SI->getOperand(2))) + if (ST->isNullValue()) + NonNullOperand = 1; + + if (NonNullOperand == -1) + return false; + + Value *SelectCond = SI->getOperand(0); + + // Change the div/rem to use 'Y' instead of the select. + I.setOperand(1, SI->getOperand(NonNullOperand)); + + // Okay, we know we replace the operand of the div/rem with 'Y' with no + // problem. However, the select, or the condition of the select may have + // multiple uses. Based on our knowledge that the operand must be non-zero, + // propagate the known value for the select into other uses of it, and + // propagate a known value of the condition into its other users. + + // If the select and condition only have a single use, don't bother with this, + // early exit. + if (SI->use_empty() && SelectCond->hasOneUse()) + return true; + + // Scan the current block backward, looking for other uses of SI. + BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); + + while (BBI != BBFront) { + --BBI; + // If we found a call to a function, we can't assume it will return, so + // information from below it cannot be propagated above it. + if (isa(BBI) && !isa(BBI)) + break; + + // Replace uses of the select or its condition with the known values. + for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); + I != E; ++I) { + if (*I == SI) { + *I = SI->getOperand(NonNullOperand); + Worklist.Add(BBI); + } else if (*I == SelectCond) { + *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) : + ConstantInt::getFalse(BBI->getContext()); + Worklist.Add(BBI); } } - // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) - if (SIToFPInst *RHSConv = dyn_cast(RHS)) { - // Only do this if x/y have the same type, if at last one of them has a - // single use (so we don't increase the number of int->fp conversions), - // and if the integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0))) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0),"addconv"); - return new SIToFPInst(NewAdd, I.getType()); - } - } + // If we past the instruction, quit looking for it. + if (&*BBI == SI) + SI = 0; + if (&*BBI == SelectCond) + SelectCond = 0; + + // If we ran out of things to eliminate, break out of the loop. + if (SelectCond == 0 && SI == 0) + break; + } - - return Changed ? &I : 0; + return true; } -/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the -/// code necessary to compute the offset from the base pointer (without adding -/// in the base pointer). Return the result as a signed integer of intptr size. -static Value *EmitGEPOffset(User *GEP, InstCombiner &IC) { - TargetData &TD = *IC.getTargetData(); - gep_type_iterator GTI = gep_type_begin(GEP); - const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); - Value *Result = Constant::getNullValue(IntPtrTy); - - // Build a mask for high order bits. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; - ++i, ++GTI) { - Value *Op = *i; - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; - if (ConstantInt *OpC = dyn_cast(Op)) { - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { - Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - - Result = IC.Builder->CreateAdd(Result, - ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".offs"); - continue; - } - - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - Constant *OC = - ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale); - // Emit an add instruction. - Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); - continue; - } - // Convert to correct type. - if (Op->getType() != IntPtrTy) - Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); - if (Size != 1) { - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - // We'll let instcombine(mul) convert this to a shl if possible. - Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); - } +/// This function implements the transforms on div instructions that work +/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is +/// used by the visitors to those instructions. +/// @brief Transforms common to all three div instructions +Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // Emit an add instruction. - Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); + // undef / X -> 0 for integer. + // undef / X -> undef for FP (the undef could be a snan). + if (isa(Op0)) { + if (Op0->getType()->isFPOrFPVector()) + return ReplaceInstUsesWith(I, Op0); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } - return Result; -} + // X / undef -> undef + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); -/// EvaluateGEPOffsetExpression - Return a value that can be used to compare -/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we -/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can -/// be complex, and scales are involved. The above expression would also be -/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). -/// This later form is less amenable to optimization though, and we are allowed -/// to generate the first by knowing that pointer arithmetic doesn't overflow. -/// -/// If we can't emit an optimized form for this expression, this returns null. -/// -static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, - InstCombiner &IC) { - TargetData &TD = *IC.getTargetData(); - gep_type_iterator GTI = gep_type_begin(GEP); + return 0; +} - // Check to see if this gep only has a single variable index. If so, and if - // any constant indices are a multiple of its scale, then we can compute this - // in terms of the scale of the variable index. For example, if the GEP - // implies an offset of "12 + i*4", then we can codegen this as "3 + i", - // because the expression will cross zero at the same point. - unsigned i, e = GEP->getNumOperands(); - int64_t Offset = 0; - for (i = 1; i != e; ++i, ++GTI) { - if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { - // Compute the aggregate offset of constant indices. - if (CI->isZero()) continue; +/// This function implements the transforms common to both integer division +/// instructions (udiv and sdiv). It is called by the visitors to those integer +/// division instructions. +/// @brief Common integer divide transforms +Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += Size*CI->getSExtValue(); - } - } else { - // Found our variable index. - break; - } - } - - // If there are no variable indices, we must have a constant offset, just - // evaluate it the general way. - if (i == e) return 0; - - Value *VariableIdx = GEP->getOperand(i); - // Determine the scale factor of the variable element. For example, this is - // 4 if the variable index is into an array of i32. - uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); - - // Verify that there are no other variable indices. If so, emit the hard way. - for (++i, ++GTI; i != e; ++i, ++GTI) { - ConstantInt *CI = dyn_cast(GEP->getOperand(i)); - if (!CI) return 0; - - // Compute the aggregate offset of constant indices. - if (CI->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += Size*CI->getSExtValue(); + // (sdiv X, X) --> 1 (udiv X, X) --> 1 + if (Op0 == Op1) { + if (const VectorType *Ty = dyn_cast(I.getType())) { + Constant *CI = ConstantInt::get(Ty->getElementType(), 1); + std::vector Elts(Ty->getNumElements(), CI); + return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); } + + Constant *CI = ConstantInt::get(I.getType(), 1); + return ReplaceInstUsesWith(I, CI); } - // Okay, we know we have a single variable index, which must be a - // pointer/array/vector index. If there is no offset, life is simple, return - // the index. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - if (Offset == 0) { - // Cast to intptrty in case a truncation occurs. If an extension is needed, - // we don't need to bother extending: the extension won't affect where the - // computation crosses zero. - if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) - VariableIdx = new TruncInst(VariableIdx, - TD.getIntPtrType(VariableIdx->getContext()), - VariableIdx->getName(), &I); - return VariableIdx; - } - - // Otherwise, there is an index. The computation we will do will be modulo - // the pointer size, so get it. - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + if (Instruction *Common = commonDivTransforms(I)) + return Common; - Offset &= PtrSizeMask; - VariableScale &= PtrSizeMask; - - // To do this transformation, any constant index must be a multiple of the - // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", - // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a - // multiple of the variable scale. - int64_t NewOffs = Offset / (int64_t)VariableScale; - if (Offset != NewOffs*(int64_t)VariableScale) - return 0; + // Handle cases involving: [su]div X, (select Cond, Y, Z) + // This does not apply for fdiv. + if (isa(Op1) && SimplifyDivRemOfSelect(I)) + return &I; - // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); - if (VariableIdx->getType() != IntPtrTy) - VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, - true /*SExt*/, - VariableIdx->getName(), &I); - Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); -} + if (ConstantInt *RHS = dyn_cast(Op1)) { + // div X, 1 == X + if (RHS->equalsInt(1)) + return ReplaceInstUsesWith(I, Op0); + // (X / C1) / C2 -> X / (C1*C2) + if (Instruction *LHS = dyn_cast(Op0)) + if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) + if (ConstantInt *LHSRHS = dyn_cast(LHS->getOperand(1))) { + if (MultiplyOverflows(RHS, LHSRHS, + I.getOpcode()==Instruction::SDiv)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + else + return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), + ConstantExpr::getMul(RHS, LHSRHS)); + } -/// Optimize pointer differences into the same array into a size. Consider: -/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer -/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. -/// -Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, - const Type *Ty) { - assert(TD && "Must have target data info for this"); - - // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize - // this. - bool Swapped; - GetElementPtrInst *GEP = 0; - ConstantExpr *CstGEP = 0; - - // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo". - // For now we require one side to be the base pointer "A" or a constant - // expression derived from it. - if (GetElementPtrInst *LHSGEP = dyn_cast(LHS)) { - // (gep X, ...) - X - if (LHSGEP->getOperand(0) == RHS) { - GEP = LHSGEP; - Swapped = false; - } else if (ConstantExpr *CE = dyn_cast(RHS)) { - // (gep X, ...) - (ce_gep X, ...) - if (CE->getOpcode() == Instruction::GetElementPtr && - LHSGEP->getOperand(0) == CE->getOperand(0)) { - CstGEP = CE; - GEP = LHSGEP; - Swapped = false; - } - } - } - - if (GetElementPtrInst *RHSGEP = dyn_cast(RHS)) { - // X - (gep X, ...) - if (RHSGEP->getOperand(0) == LHS) { - GEP = RHSGEP; - Swapped = true; - } else if (ConstantExpr *CE = dyn_cast(LHS)) { - // (ce_gep X, ...) - (gep X, ...) - if (CE->getOpcode() == Instruction::GetElementPtr && - RHSGEP->getOperand(0) == CE->getOperand(0)) { - CstGEP = CE; - GEP = RHSGEP; - Swapped = true; - } + if (!RHS->isZero()) { // avoid X udiv 0 + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; } } - - if (GEP == 0) - return 0; - - // Emit the offset of the GEP and an intptr_t. - Value *Result = EmitGEPOffset(GEP, *this); - - // If we had a constant expression GEP on the other side offsetting the - // pointer, subtract it from the offset we have. - if (CstGEP) { - Value *CstOffset = EmitGEPOffset(CstGEP, *this); - Result = Builder->CreateSub(Result, CstOffset); - } - - // If we have p - gep(p, ...) then we have to negate the result. - if (Swapped) - Result = Builder->CreateNeg(Result, "diff.neg"); + // 0 / X == 0, we don't need to preserve faults! + if (ConstantInt *LHS = dyn_cast(Op0)) + if (LHS->equalsInt(0)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - return Builder->CreateIntCast(Result, Ty, true); -} + // It can't be division by zero, hence it must be division by one. + if (I.getType() == Type::getInt1Ty(I.getContext())) + return ReplaceInstUsesWith(I, Op0); + + if (ConstantVector *Op1V = dyn_cast(Op1)) { + if (ConstantInt *X = cast_or_null(Op1V->getSplatValue())) + // div X, 1 == X + if (X->isOne()) + return ReplaceInstUsesWith(I, Op0); + } + return 0; +} -Instruction *InstCombiner::visitSub(BinaryOperator &I) { +Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Op0 == Op1) // sub X, X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; - // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. - if (Value *V = dyn_castNegVal(Op1)) { - BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); - Res->setHasNoSignedWrap(I.hasNoSignedWrap()); - Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - return Res; + if (ConstantInt *C = dyn_cast(Op1)) { + // X udiv C^2 -> X >> C + // Check to see if this is an unsigned division with an exact power of 2, + // if so, convert to a right shift. + if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 + return BinaryOperator::CreateLShr(Op0, + ConstantInt::get(Op0->getType(), C->getValue().logBase2())); + + // X udiv C, where C >= signbit + if (C->getValue().isNegative()) { + Value *IC = Builder->CreateICmpULT( Op0, C); + return SelectInst::Create(IC, Constant::getNullValue(I.getType()), + ConstantInt::get(I.getType(), 1)); + } } - if (isa(Op0)) - return ReplaceInstUsesWith(I, Op0); // undef - X -> undef - if (isa(Op1)) - return ReplaceInstUsesWith(I, Op1); // X - undef -> undef - if (I.getType() == Type::getInt1Ty(I.getContext())) - return BinaryOperator::CreateXor(Op0, Op1); - - if (ConstantInt *C = dyn_cast(Op0)) { - // Replace (-1 - A) with (~A). - if (C->isAllOnesValue()) - return BinaryOperator::CreateNot(Op1); - - // C - ~X == X + (1+C) - Value *X = 0; - if (match(Op1, m_Not(m_Value(X)))) - return BinaryOperator::CreateAdd(X, AddOne(C)); - - // -(X >>u 31) -> (X >>s 31) - // -(X >>s 31) -> (X >>u 31) - if (C->isZero()) { - if (BinaryOperator *SI = dyn_cast(Op1)) { - if (SI->getOpcode() == Instruction::LShr) { - if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert AShr. - return BinaryOperator::Create(Instruction::AShr, - SI->getOperand(0), CU, SI->getName()); - } - } - } else if (SI->getOpcode() == Instruction::AShr) { - if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert LShr. - return BinaryOperator::CreateLShr( - SI->getOperand(0), CU, SI->getName()); - } - } - } + // X udiv (C1 << N), where C1 is "1< X >> (N+C2) + if (BinaryOperator *RHSI = dyn_cast(I.getOperand(1))) { + if (RHSI->getOpcode() == Instruction::Shl && + isa(RHSI->getOperand(0))) { + const APInt& C1 = cast(RHSI->getOperand(0))->getValue(); + if (C1.isPowerOf2()) { + Value *N = RHSI->getOperand(1); + const Type *NTy = N->getType(); + if (uint32_t C2 = C1.logBase2()) + N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); + return BinaryOperator::CreateLShr(Op0, N); } } - - // Try to fold constant sub into select arguments. - if (SelectInst *SI = dyn_cast(Op1)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - // C - zext(bool) -> bool ? C - 1 : C - if (ZExtInst *ZI = dyn_cast(Op1)) - if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) - return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); } + + // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) + // where C1&C2 are powers of two. + if (SelectInst *SI = dyn_cast(Op1)) + if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) + if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { + const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); + if (TVA.isPowerOf2() && FVA.isPowerOf2()) { + // Compute the shift amounts + uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); + // Construct the "on true" case of the select + Constant *TC = ConstantInt::get(Op0->getType(), TSA); + Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); + + // Construct the "on false" case of the select + Constant *FC = ConstantInt::get(Op0->getType(), FSA); + Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); - if (BinaryOperator *Op1I = dyn_cast(Op1)) { - if (Op1I->getOpcode() == Instruction::Add) { - if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(1), - I.getName()); - else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(0), - I.getName()); - else if (ConstantInt *CI1 = dyn_cast(I.getOperand(0))) { - if (ConstantInt *CI2 = dyn_cast(Op1I->getOperand(1))) - // C1-(X+C2) --> (C1-C2)-X - return BinaryOperator::CreateSub( - ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); - } - } - - if (Op1I->hasOneUse()) { - // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression - // is not used by anyone else... - // - if (Op1I->getOpcode() == Instruction::Sub) { - // Swap the two operands of the subexpr... - Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); - Op1I->setOperand(0, IIOp1); - Op1I->setOperand(1, IIOp0); - - // Create the new top level add instruction... - return BinaryOperator::CreateAdd(Op0, Op1); + // construct the select instruction and return it. + return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); + } } + return 0; +} - // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... - // - if (Op1I->getOpcode() == Instruction::And && - (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { - Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); +Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); - return BinaryOperator::CreateAnd(Op0, NewNot); - } + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; - // 0 - (X sdiv C) -> (X sdiv -C) - if (Op1I->getOpcode() == Instruction::SDiv) - if (ConstantInt *CSI = dyn_cast(Op0)) - if (CSI->isZero()) - if (Constant *DivRHS = dyn_cast(Op1I->getOperand(1))) - return BinaryOperator::CreateSDiv(Op1I->getOperand(0), - ConstantExpr::getNeg(DivRHS)); + if (ConstantInt *RHS = dyn_cast(Op1)) { + // sdiv X, -1 == -X + if (RHS->isAllOnesValue()) + return BinaryOperator::CreateNeg(Op0); - // X - X*C --> X * (1-C) - ConstantInt *C2 = 0; - if (dyn_castFoldableMul(Op1I, C2) == Op0) { - Constant *CP1 = - ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), - C2); - return BinaryOperator::CreateMul(Op0, CP1); - } + // sdiv X, C --> ashr X, log2(C) + if (cast(&I)->isExact() && + RHS->getValue().isNonNegative() && + RHS->getValue().isPowerOf2()) { + Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), + RHS->getValue().exactLogBase2()); + return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); } - } - if (BinaryOperator *Op0I = dyn_cast(Op0)) { - if (Op0I->getOpcode() == Instruction::Add) { - if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(1)); - else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(0)); - } else if (Op0I->getOpcode() == Instruction::Sub) { - if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y - return BinaryOperator::CreateNeg(Op0I->getOperand(1), - I.getName()); - } + // -X/C --> X/-C provided the negation doesn't overflow. + if (SubOperator *Sub = dyn_cast(Op0)) + if (isa(Sub->getOperand(0)) && + cast(Sub->getOperand(0))->isNullValue() && + Sub->hasNoSignedWrap()) + return BinaryOperator::CreateSDiv(Sub->getOperand(1), + ConstantExpr::getNeg(RHS)); } - ConstantInt *C1; - if (Value *X = dyn_castFoldableMul(Op0, C1)) { - if (X == Op1) // X*C - X --> X * (C-1) - return BinaryOperator::CreateMul(Op1, SubOne(C1)); - - ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) - if (X == dyn_castFoldableMul(Op1, C2)) - return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); - } - - // Optimize pointer differences into the same array into a size. Consider: - // &A[10] - &A[0]: we should compile this to "10". - if (TD) { - Value *LHSOp, *RHSOp; - if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && - match(Op1, m_PtrToInt(m_Value(RHSOp)))) - if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) - return ReplaceInstUsesWith(I, Res); - - // trunc(p)-trunc(q) -> trunc(p-q) - if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && - match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) - if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) - return ReplaceInstUsesWith(I, Res); + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a udiv. + if (I.getType()->isInteger()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op0, Mask)) { + if (MaskedValueIsZero(Op1, Mask)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + ConstantInt *ShiftedInt; + if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && + ShiftedInt->getValue().isPowerOf2()) { + // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) + // Safe because the only negative value (1 << Y) can take on is + // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have + // the sign bit set. + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + } } return 0; } -Instruction *InstCombiner::visitFSub(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); +Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { + return commonDivTransforms(I); +} - // If this is a 'B = x-(-A)', change to B = x+A... - if (Value *V = dyn_castFNegVal(Op1)) - return BinaryOperator::CreateFAdd(Op0, V); +/// This function implements the transforms on rem instructions that work +/// regardless of the kind of rem instruction it is (urem, srem, or frem). It +/// is used by the visitors to those instructions. +/// @brief Transforms common to all three rem instructions +Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (BinaryOperator *Op1I = dyn_cast(Op1)) { - if (Op1I->getOpcode() == Instruction::FAdd) { - if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(1), - I.getName()); - else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(0), - I.getName()); - } + if (isa(Op0)) { // undef % X -> 0 + if (I.getType()->isFPOrFPVector()) + return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); // X % undef -> undef - return 0; -} + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa(Op1) && SimplifyDivRemOfSelect(I)) + return &I; -/// isSignBitCheck - Given an exploded icmp instruction, return true if the -/// comparison only checks the sign bit. If it only checks the sign bit, set -/// TrueIfSigned if the result of the comparison is true when the input value is -/// signed. -static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, - bool &TrueIfSigned) { - switch (pred) { - case ICmpInst::ICMP_SLT: // True if LHS s< 0 - TrueIfSigned = true; - return RHS->isZero(); - case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 - TrueIfSigned = true; - return RHS->isAllOnesValue(); - case ICmpInst::ICMP_SGT: // True if LHS s> -1 - TrueIfSigned = false; - return RHS->isAllOnesValue(); - case ICmpInst::ICMP_UGT: - // True if LHS u> RHS and RHS == high-bit-mask - 1 - TrueIfSigned = true; - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); - case ICmpInst::ICMP_UGE: - // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) - TrueIfSigned = true; - return RHS->getValue().isSignBit(); - default: - return false; - } + return 0; } -Instruction *InstCombiner::visitMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); +/// This function implements the transforms common to both integer remainder +/// instructions (urem and srem). It is called by the visitors to those integer +/// remainder instructions. +/// @brief Common integer remainder transforms +Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa(Op1)) // undef * X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // Simplify mul instructions with a constant RHS. - if (Constant *Op1C = dyn_cast(Op1)) { - if (ConstantInt *CI = dyn_cast(Op1C)) { + if (Instruction *common = commonRemTransforms(I)) + return common; - // ((X << C1)*C2) == (X * (C2 << C1)) - if (BinaryOperator *SI = dyn_cast(Op0)) - if (SI->getOpcode() == Instruction::Shl) - if (Constant *ShOp = dyn_cast(SI->getOperand(1))) - return BinaryOperator::CreateMul(SI->getOperand(0), - ConstantExpr::getShl(CI, ShOp)); + // 0 % X == 0 for integer, we don't need to preserve faults! + if (Constant *LHS = dyn_cast(Op0)) + if (LHS->isNullValue()) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - if (CI->isZero()) - return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 - if (CI->equalsInt(1)) // X * 1 == X - return ReplaceInstUsesWith(I, Op0); - if (CI->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0, I.getName()); + if (ConstantInt *RHS = dyn_cast(Op1)) { + // X % 0 == undef, we don't need to preserve faults! + if (RHS->equalsInt(0)) + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); + + if (RHS->equalsInt(1)) // X % 1 == 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - const APInt& Val = cast(CI)->getValue(); - if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C - return BinaryOperator::CreateShl(Op0, - ConstantInt::get(Op0->getType(), Val.logBase2())); + if (Instruction *Op0I = dyn_cast(Op0)) { + if (SelectInst *SI = dyn_cast(Op0I)) { + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + } else if (isa(Op0I)) { + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; } - } else if (isa(Op1C->getType())) { - if (Op1C->isNullValue()) - return ReplaceInstUsesWith(I, Op1C); - - if (ConstantVector *Op1V = dyn_cast(Op1C)) { - if (Op1V->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0, I.getName()); - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantInt *CI = dyn_cast(Splat)) - if (CI->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - } - } + // See if we can fold away this rem instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; } - - if (BinaryOperator *Op0I = dyn_cast(Op0)) - if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && - isa(Op0I->getOperand(1)) && isa(Op1C)) { - // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. - Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); - Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); - return BinaryOperator::CreateAdd(Add, C1C2); - - } + } - // Try to fold constant mul into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; + return 0; +} - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } +Instruction *InstCombiner::visitURem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castNegVal(Op1)) - return BinaryOperator::CreateMul(Op0v, Op1v); + if (Instruction *common = commonIRemTransforms(I)) + return common; + + if (ConstantInt *RHS = dyn_cast(Op1)) { + // X urem C^2 -> X and C + // Check to see if this is an unsigned remainder with an exact power of 2, + // if so, convert to a bitwise and. + if (ConstantInt *C = dyn_cast(RHS)) + if (C->getValue().isPowerOf2()) + return BinaryOperator::CreateAnd(Op0, SubOne(C)); + } - // (X / Y) * Y = X - (X % Y) - // (X / Y) * -Y = (X % Y) - X - { - Value *Op1C = Op1; - BinaryOperator *BO = dyn_cast(Op0); - if (!BO || - (BO->getOpcode() != Instruction::UDiv && - BO->getOpcode() != Instruction::SDiv)) { - Op1C = Op0; - BO = dyn_cast(Op1); + if (Instruction *RHSI = dyn_cast(I.getOperand(1))) { + // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) + if (RHSI->getOpcode() == Instruction::Shl && + isa(RHSI->getOperand(0))) { + if (cast(RHSI->getOperand(0))->getValue().isPowerOf2()) { + Constant *N1 = Constant::getAllOnesValue(I.getType()); + Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); + return BinaryOperator::CreateAnd(Op0, Add); + } } - Value *Neg = dyn_castNegVal(Op1C); - if (BO && BO->hasOneUse() && - (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && - (BO->getOpcode() == Instruction::UDiv || - BO->getOpcode() == Instruction::SDiv)) { - Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); + } - // If the division is exact, X % Y is zero. - if (SDivOperator *SDiv = dyn_cast(BO)) - if (SDiv->isExact()) { - if (Op1BO == Op1C) - return ReplaceInstUsesWith(I, Op0BO); - return BinaryOperator::CreateNeg(Op0BO); + // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) + // where C1&C2 are powers of two. + if (SelectInst *SI = dyn_cast(Op1)) { + if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) + if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { + // STO == 0 and SFO == 0 handled above. + if ((STO->getValue().isPowerOf2()) && + (SFO->getValue().isPowerOf2())) { + Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), + SI->getName()+".t"); + Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), + SI->getName()+".f"); + return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); } - - Value *Rem; - if (BO->getOpcode() == Instruction::UDiv) - Rem = Builder->CreateURem(Op0BO, Op1BO); - else - Rem = Builder->CreateSRem(Op0BO, Op1BO); - Rem->takeName(BO); - - if (Op1BO == Op1C) - return BinaryOperator::CreateSub(Op0BO, Rem); - return BinaryOperator::CreateSub(Rem, Op0BO); - } + } } + + return 0; +} - /// i1 mul -> i1 and. - if (I.getType() == Type::getInt1Ty(I.getContext())) - return BinaryOperator::CreateAnd(Op0, Op1); +Instruction *InstCombiner::visitSRem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // X*(1 << Y) --> X << Y - // (1 << Y)*X --> X << Y - { - Value *Y; - if (match(Op0, m_Shl(m_One(), m_Value(Y)))) - return BinaryOperator::CreateShl(Op1, Y); - if (match(Op1, m_Shl(m_One(), m_Value(Y)))) - return BinaryOperator::CreateShl(Op0, Y); - } + // Handle the integer rem common cases + if (Instruction *Common = commonIRemTransforms(I)) + return Common; - // If one of the operands of the multiply is a cast from a boolean value, then - // we know the bool is either zero or one, so this is a 'masking' multiply. - // X * Y (where Y is 0 or 1) -> X & (0-Y) - if (!isa(I.getType())) { - // -2 is "-1 << 1" so it is all bits set except the low one. - APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); - - Value *BoolCast = 0, *OtherOp = 0; - if (MaskedValueIsZero(Op0, Negative2)) - BoolCast = Op0, OtherOp = Op1; - else if (MaskedValueIsZero(Op1, Negative2)) - BoolCast = Op1, OtherOp = Op0; + if (Value *RHSNeg = dyn_castNegVal(Op1)) + if (!isa(RHSNeg) || + (isa(RHSNeg) && + cast(RHSNeg)->getValue().isStrictlyPositive())) { + // X % -Y -> X % Y + Worklist.AddValue(I.getOperand(1)); + I.setOperand(1, RHSNeg); + return &I; + } - if (BoolCast) { - Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), - BoolCast, "tmp"); - return BinaryOperator::CreateAnd(V, OtherOp); + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a urem. + if (I.getType()->isInteger()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { + // X srem Y -> X urem Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); } } - return Changed ? &I : 0; -} + // If it's a constant vector, flip any negative values positive. + if (ConstantVector *RHSV = dyn_cast(Op1)) { + unsigned VWidth = RHSV->getNumOperands(); -Instruction *InstCombiner::visitFMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + bool hasNegative = false; + for (unsigned i = 0; !hasNegative && i != VWidth; ++i) + if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) + if (RHS->getValue().isNegative()) + hasNegative = true; - // Simplify mul instructions with a constant RHS... - if (Constant *Op1C = dyn_cast(Op1)) { - if (ConstantFP *Op1F = dyn_cast(Op1C)) { - // "In IEEE floating point, x*1 is not equivalent to x for nans. However, - // ANSI says we can drop signals, so we can do this anyway." (from GCC) - if (Op1F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' - } else if (isa(Op1C->getType())) { - if (ConstantVector *Op1V = dyn_cast(Op1C)) { - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantFP *F = dyn_cast(Splat)) - if (F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); + if (hasNegative) { + std::vector Elts(VWidth); + for (unsigned i = 0; i != VWidth; ++i) { + if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) { + if (RHS->getValue().isNegative()) + Elts[i] = cast(ConstantExpr::getNeg(RHS)); + else + Elts[i] = RHS; } } - } - - // Try to fold constant mul into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + Constant *NewRHSV = ConstantVector::get(Elts); + if (NewRHSV != RHSV) { + Worklist.AddValue(I.getOperand(1)); + I.setOperand(1, NewRHSV); + return &I; + } + } } - if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castFNegVal(Op1)) - return BinaryOperator::CreateFMul(Op0v, Op1v); + return 0; +} - return Changed ? &I : 0; +Instruction *InstCombiner::visitFRem(BinaryOperator &I) { + return commonRemTransforms(I); } -/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select -/// instruction. -bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { - SelectInst *SI = cast(I.getOperand(1)); - - // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y - int NonNullOperand = -1; - if (Constant *ST = dyn_cast(SI->getOperand(1))) - if (ST->isNullValue()) - NonNullOperand = 2; - // div/rem X, (Cond ? Y : 0) -> div/rem X, Y - if (Constant *ST = dyn_cast(SI->getOperand(2))) - if (ST->isNullValue()) - NonNullOperand = 1; - - if (NonNullOperand == -1) - return false; - - Value *SelectCond = SI->getOperand(0); - - // Change the div/rem to use 'Y' instead of the select. - I.setOperand(1, SI->getOperand(NonNullOperand)); - - // Okay, we know we replace the operand of the div/rem with 'Y' with no - // problem. However, the select, or the condition of the select may have - // multiple uses. Based on our knowledge that the operand must be non-zero, - // propagate the known value for the select into other uses of it, and - // propagate a known value of the condition into its other users. - - // If the select and condition only have a single use, don't bother with this, - // early exit. - if (SI->use_empty() && SelectCond->hasOneUse()) - return true; - - // Scan the current block backward, looking for other uses of SI. - BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); - - while (BBI != BBFront) { - --BBI; - // If we found a call to a function, we can't assume it will return, so - // information from below it cannot be propagated above it. - if (isa(BBI) && !isa(BBI)) - break; - - // Replace uses of the select or its condition with the known values. - for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); - I != E; ++I) { - if (*I == SI) { - *I = SI->getOperand(NonNullOperand); - Worklist.Add(BBI); - } else if (*I == SelectCond) { - *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) : - ConstantInt::getFalse(BBI->getContext()); - Worklist.Add(BBI); - } - } - - // If we past the instruction, quit looking for it. - if (&*BBI == SI) - SI = 0; - if (&*BBI == SelectCond) - SelectCond = 0; - - // If we ran out of things to eliminate, break out of the loop. - if (SelectCond == 0 && SI == 0) - break; - +/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits +/// are carefully arranged to allow folding of expressions such as: +/// +/// (A < B) | (A > B) --> (A != B) +/// +/// Note that this is only valid if the first and second predicates have the +/// same sign. Is illegal to do: (A u< B) | (A s> B) +/// +/// Three bits are used to represent the condition, as follows: +/// 0 A > B +/// 1 A == B +/// 2 A < B +/// +/// <=> Value Definition +/// 000 0 Always false +/// 001 1 A > B +/// 010 2 A == B +/// 011 3 A >= B +/// 100 4 A < B +/// 101 5 A != B +/// 110 6 A <= B +/// 111 7 Always true +/// +static unsigned getICmpCode(const ICmpInst *ICI) { + switch (ICI->getPredicate()) { + // False -> 0 + case ICmpInst::ICMP_UGT: return 1; // 001 + case ICmpInst::ICMP_SGT: return 1; // 001 + case ICmpInst::ICMP_EQ: return 2; // 010 + case ICmpInst::ICMP_UGE: return 3; // 011 + case ICmpInst::ICMP_SGE: return 3; // 011 + case ICmpInst::ICMP_ULT: return 4; // 100 + case ICmpInst::ICMP_SLT: return 4; // 100 + case ICmpInst::ICMP_NE: return 5; // 101 + case ICmpInst::ICMP_ULE: return 6; // 110 + case ICmpInst::ICMP_SLE: return 6; // 110 + // True -> 7 + default: + llvm_unreachable("Invalid ICmp predicate!"); + return 0; } - return true; } - -/// This function implements the transforms on div instructions that work -/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is -/// used by the visitors to those instructions. -/// @brief Transforms common to all three div instructions -Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // undef / X -> 0 for integer. - // undef / X -> undef for FP (the undef could be a snan). - if (isa(Op0)) { - if (Op0->getType()->isFPOrFPVector()) - return ReplaceInstUsesWith(I, Op0); - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); +/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp +/// predicate into a three bit mask. It also returns whether it is an ordered +/// predicate by reference. +static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { + isOrdered = false; + switch (CC) { + case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 + case FCmpInst::FCMP_UNO: return 0; // 000 + case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 + case FCmpInst::FCMP_UGT: return 1; // 001 + case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 + case FCmpInst::FCMP_UEQ: return 2; // 010 + case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 + case FCmpInst::FCMP_UGE: return 3; // 011 + case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 + case FCmpInst::FCMP_ULT: return 4; // 100 + case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 + case FCmpInst::FCMP_UNE: return 5; // 101 + case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 + case FCmpInst::FCMP_ULE: return 6; // 110 + // True -> 7 + default: + // Not expecting FCMP_FALSE and FCMP_TRUE; + llvm_unreachable("Unexpected FCmp predicate!"); + return 0; } - - // X / undef -> undef - if (isa(Op1)) - return ReplaceInstUsesWith(I, Op1); - - return 0; } -/// This function implements the transforms common to both integer division -/// instructions (udiv and sdiv). It is called by the visitors to those integer -/// division instructions. -/// @brief Common integer divide transforms -Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // (sdiv X, X) --> 1 (udiv X, X) --> 1 - if (Op0 == Op1) { - if (const VectorType *Ty = dyn_cast(I.getType())) { - Constant *CI = ConstantInt::get(Ty->getElementType(), 1); - std::vector Elts(Ty->getNumElements(), CI); - return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); - } - - Constant *CI = ConstantInt::get(I.getType(), 1); - return ReplaceInstUsesWith(I, CI); - } - - if (Instruction *Common = commonDivTransforms(I)) - return Common; - - // Handle cases involving: [su]div X, (select Cond, Y, Z) - // This does not apply for fdiv. - if (isa(Op1) && SimplifyDivRemOfSelect(I)) - return &I; - - if (ConstantInt *RHS = dyn_cast(Op1)) { - // div X, 1 == X - if (RHS->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - - // (X / C1) / C2 -> X / (C1*C2) - if (Instruction *LHS = dyn_cast(Op0)) - if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) - if (ConstantInt *LHSRHS = dyn_cast(LHS->getOperand(1))) { - if (MultiplyOverflows(RHS, LHSRHS, - I.getOpcode()==Instruction::SDiv)) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - else - return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - ConstantExpr::getMul(RHS, LHSRHS)); - } - - if (!RHS->isZero()) { // avoid X udiv 0 - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - } - - // 0 / X == 0, we don't need to preserve faults! - if (ConstantInt *LHS = dyn_cast(Op0)) - if (LHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // It can't be division by zero, hence it must be division by one. - if (I.getType() == Type::getInt1Ty(I.getContext())) - return ReplaceInstUsesWith(I, Op0); - - if (ConstantVector *Op1V = dyn_cast(Op1)) { - if (ConstantInt *X = cast_or_null(Op1V->getSplatValue())) - // div X, 1 == X - if (X->isOne()) - return ReplaceInstUsesWith(I, Op0); +/// getICmpValue - This is the complement of getICmpCode, which turns an +/// opcode and two operands into either a constant true or false, or a brand +/// new ICmp instruction. The sign is passed in to determine which kind +/// of predicate to use in the new icmp instruction. +static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) { + switch (code) { + default: llvm_unreachable("Illegal ICmp code!"); + case 0: return ConstantInt::getFalse(LHS->getContext()); + case 1: + if (sign) + return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); + else + return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); + case 2: return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); + case 3: + if (sign) + return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); + else + return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); + case 4: + if (sign) + return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); + else + return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); + case 5: return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); + case 6: + if (sign) + return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); + else + return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); + case 7: return ConstantInt::getTrue(LHS->getContext()); } - - return 0; } -Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Handle the integer div common cases - if (Instruction *Common = commonIDivTransforms(I)) - return Common; +/// getFCmpValue - This is the complement of getFCmpCode, which turns an +/// opcode and two operands into either a FCmp instruction. isordered is passed +/// in to determine which kind of predicate to use in the new fcmp instruction. +static Value *getFCmpValue(bool isordered, unsigned code, + Value *LHS, Value *RHS) { + switch (code) { + default: llvm_unreachable("Illegal FCmp code!"); + case 0: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); + case 1: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); + case 2: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); + case 3: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); + case 4: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); + case 5: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); + case 6: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); + case 7: return ConstantInt::getTrue(LHS->getContext()); + } +} - if (ConstantInt *C = dyn_cast(Op1)) { - // X udiv C^2 -> X >> C - // Check to see if this is an unsigned division with an exact power of 2, - // if so, convert to a right shift. - if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 - return BinaryOperator::CreateLShr(Op0, - ConstantInt::get(Op0->getType(), C->getValue().logBase2())); +/// PredicatesFoldable - Return true if both predicates match sign or if at +/// least one of them is an equality comparison (which is signless). +static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { + return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || + (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || + (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); +} - // X udiv C, where C >= signbit - if (C->getValue().isNegative()) { - Value *IC = Builder->CreateICmpULT( Op0, C); - return SelectInst::Create(IC, Constant::getNullValue(I.getType()), - ConstantInt::get(I.getType(), 1)); - } +namespace { +// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) +struct FoldICmpLogical { + InstCombiner &IC; + Value *LHS, *RHS; + ICmpInst::Predicate pred; + FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI) + : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)), + pred(ICI->getPredicate()) {} + bool shouldApply(Value *V) const { + if (ICmpInst *ICI = dyn_cast(V)) + if (PredicatesFoldable(pred, ICI->getPredicate())) + return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) || + (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS)); + return false; } - - // X udiv (C1 << N), where C1 is "1< X >> (N+C2) - if (BinaryOperator *RHSI = dyn_cast(I.getOperand(1))) { - if (RHSI->getOpcode() == Instruction::Shl && - isa(RHSI->getOperand(0))) { - const APInt& C1 = cast(RHSI->getOperand(0))->getValue(); - if (C1.isPowerOf2()) { - Value *N = RHSI->getOperand(1); - const Type *NTy = N->getType(); - if (uint32_t C2 = C1.logBase2()) - N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); - return BinaryOperator::CreateLShr(Op0, N); - } + Instruction *apply(Instruction &Log) const { + ICmpInst *ICI = cast(Log.getOperand(0)); + if (ICI->getOperand(0) != LHS) { + assert(ICI->getOperand(1) == LHS); + ICI->swapOperands(); // Swap the LHS and RHS of the ICmp } - } - - // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) - // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast(Op1)) - if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { - const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); - if (TVA.isPowerOf2() && FVA.isPowerOf2()) { - // Compute the shift amounts - uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); - // Construct the "on true" case of the select - Constant *TC = ConstantInt::get(Op0->getType(), TSA); - Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); - - // Construct the "on false" case of the select - Constant *FC = ConstantInt::get(Op0->getType(), FSA); - Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); - // construct the select instruction and return it. - return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); - } - } - return 0; -} + ICmpInst *RHSICI = cast(Log.getOperand(1)); + unsigned LHSCode = getICmpCode(ICI); + unsigned RHSCode = getICmpCode(RHSICI); + unsigned Code; + switch (Log.getOpcode()) { + case Instruction::And: Code = LHSCode & RHSCode; break; + case Instruction::Or: Code = LHSCode | RHSCode; break; + case Instruction::Xor: Code = LHSCode ^ RHSCode; break; + default: llvm_unreachable("Illegal logical opcode!"); return 0; + } -Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + bool isSigned = RHSICI->isSigned() || ICI->isSigned(); + Value *RV = getICmpValue(isSigned, Code, LHS, RHS); + if (Instruction *I = dyn_cast(RV)) + return I; + // Otherwise, it's a constant boolean value... + return IC.ReplaceInstUsesWith(Log, RV); + } +}; +} // end anonymous namespace - // Handle the integer div common cases - if (Instruction *Common = commonIDivTransforms(I)) - return Common; +// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where +// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is +// guaranteed to be a binary operator. +Instruction *InstCombiner::OptAndOp(Instruction *Op, + ConstantInt *OpRHS, + ConstantInt *AndRHS, + BinaryOperator &TheAnd) { + Value *X = Op->getOperand(0); + Constant *Together = 0; + if (!Op->isShift()) + Together = ConstantExpr::getAnd(AndRHS, OpRHS); - if (ConstantInt *RHS = dyn_cast(Op1)) { - // sdiv X, -1 == -X - if (RHS->isAllOnesValue()) - return BinaryOperator::CreateNeg(Op0); + switch (Op->getOpcode()) { + case Instruction::Xor: + if (Op->hasOneUse()) { + // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) + Value *And = Builder->CreateAnd(X, AndRHS); + And->takeName(Op); + return BinaryOperator::CreateXor(And, Together); + } + break; + case Instruction::Or: + if (Together == AndRHS) // (X | C) & C --> C + return ReplaceInstUsesWith(TheAnd, AndRHS); - // sdiv X, C --> ashr X, log2(C) - if (cast(&I)->isExact() && - RHS->getValue().isNonNegative() && - RHS->getValue().isPowerOf2()) { - Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), - RHS->getValue().exactLogBase2()); - return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); + if (Op->hasOneUse() && Together != OpRHS) { + // (X | C1) & C2 --> (X | (C1&C2)) & C2 + Value *Or = Builder->CreateOr(X, Together); + Or->takeName(Op); + return BinaryOperator::CreateAnd(Or, AndRHS); } + break; + case Instruction::Add: + if (Op->hasOneUse()) { + // Adding a one to a single bit bit-field should be turned into an XOR + // of the bit. First thing to check is to see if this AND is with a + // single bit constant. + const APInt &AndRHSV = cast(AndRHS)->getValue(); - // -X/C --> X/-C provided the negation doesn't overflow. - if (SubOperator *Sub = dyn_cast(Op0)) - if (isa(Sub->getOperand(0)) && - cast(Sub->getOperand(0))->isNullValue() && - Sub->hasNoSignedWrap()) - return BinaryOperator::CreateSDiv(Sub->getOperand(1), - ConstantExpr::getNeg(RHS)); - } + // If there is only one bit set. + if (AndRHSV.isPowerOf2()) { + // Ok, at this point, we know that we are masking the result of the + // ADD down to exactly one bit. If the constant we are adding has + // no bits set below this bit, then we can eliminate the ADD. + const APInt& AddRHS = cast(OpRHS)->getValue(); - // If the sign bits of both operands are zero (i.e. we can prove they are - // unsigned inputs), turn this into a udiv. - if (I.getType()->isInteger()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op0, Mask)) { - if (MaskedValueIsZero(Op1, Mask)) { - // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - } - ConstantInt *ShiftedInt; - if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && - ShiftedInt->getValue().isPowerOf2()) { - // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) - // Safe because the only negative value (1 << Y) can take on is - // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have - // the sign bit set. - return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + // Check to see if any bits below the one bit set in AndRHSV are set. + if ((AddRHS & (AndRHSV-1)) == 0) { + // If not, the only thing that can effect the output of the AND is + // the bit specified by AndRHSV. If that bit is set, the effect of + // the XOR is to toggle the bit. If it is clear, then the ADD has + // no effect. + if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop + TheAnd.setOperand(0, X); + return &TheAnd; + } else { + // Pull the XOR out of the AND. + Value *NewAnd = Builder->CreateAnd(X, AndRHS); + NewAnd->takeName(Op); + return BinaryOperator::CreateXor(NewAnd, AndRHS); + } + } } } - } - - return 0; -} - -Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { - return commonDivTransforms(I); -} + break; -/// This function implements the transforms on rem instructions that work -/// regardless of the kind of rem instruction it is (urem, srem, or frem). It -/// is used by the visitors to those instructions. -/// @brief Transforms common to all three rem instructions -Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVector()) - return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa(Op1)) - return ReplaceInstUsesWith(I, Op1); // X % undef -> undef - - // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa(Op1) && SimplifyDivRemOfSelect(I)) - return &I; - - return 0; -} - -/// This function implements the transforms common to both integer remainder -/// instructions (urem and srem). It is called by the visitors to those integer -/// remainder instructions. -/// @brief Common integer remainder transforms -Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Instruction *common = commonRemTransforms(I)) - return common; - - // 0 % X == 0 for integer, we don't need to preserve faults! - if (Constant *LHS = dyn_cast(Op0)) - if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - if (ConstantInt *RHS = dyn_cast(Op1)) { - // X % 0 == undef, we don't need to preserve faults! - if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - - if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - if (Instruction *Op0I = dyn_cast(Op0)) { - if (SelectInst *SI = dyn_cast(Op0I)) { - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - } else if (isa(Op0I)) { - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } + case Instruction::Shl: { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); + ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), + AndRHS->getValue() & ShlMask); - // See if we can fold away this rem instruction. - if (SimplifyDemandedInstructionBits(I)) - return &I; + if (CI->getValue() == ShlMask) { + // Masking out bits that the shift already masks + return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. + } else if (CI != AndRHS) { // Reducing bits set in and. + TheAnd.setOperand(1, CI); + return &TheAnd; } + break; } + case Instruction::LShr: { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! This only applies to + // unsigned shifts, because a signed shr may bring in set bits! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + ConstantInt *CI = ConstantInt::get(Op->getContext(), + AndRHS->getValue() & ShrMask); - return 0; -} - -Instruction *InstCombiner::visitURem(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Instruction *common = commonIRemTransforms(I)) - return common; - - if (ConstantInt *RHS = dyn_cast(Op1)) { - // X urem C^2 -> X and C - // Check to see if this is an unsigned remainder with an exact power of 2, - // if so, convert to a bitwise and. - if (ConstantInt *C = dyn_cast(RHS)) - if (C->getValue().isPowerOf2()) - return BinaryOperator::CreateAnd(Op0, SubOne(C)); - } - - if (Instruction *RHSI = dyn_cast(I.getOperand(1))) { - // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) - if (RHSI->getOpcode() == Instruction::Shl && - isa(RHSI->getOperand(0))) { - if (cast(RHSI->getOperand(0))->getValue().isPowerOf2()) { - Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); - return BinaryOperator::CreateAnd(Op0, Add); - } + if (CI->getValue() == ShrMask) { + // Masking out bits that the shift already masks. + return ReplaceInstUsesWith(TheAnd, Op); + } else if (CI != AndRHS) { + TheAnd.setOperand(1, CI); // Reduce bits set in and cst. + return &TheAnd; } + break; } - - // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) - // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast(Op1)) { - if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { - // STO == 0 and SFO == 0 handled above. - if ((STO->getValue().isPowerOf2()) && - (SFO->getValue().isPowerOf2())) { - Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), - SI->getName()+".t"); - Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), - SI->getName()+".f"); - return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); - } + case Instruction::AShr: + // Signed shr. + // See if this is shifting in some sign extension, then masking it out + // with an and. + if (Op->hasOneUse()) { + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + Constant *C = ConstantInt::get(Op->getContext(), + AndRHS->getValue() & ShrMask); + if (C == AndRHS) { // Masking out bits shifted in. + // (Val ashr C1) & C2 -> (Val lshr C1) & C2 + // Make the argument unsigned. + Value *ShVal = Op->getOperand(0); + ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); + return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); } + } + break; } - return 0; } -Instruction *InstCombiner::visitSRem(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // Handle the integer rem common cases - if (Instruction *Common = commonIRemTransforms(I)) - return Common; - - if (Value *RHSNeg = dyn_castNegVal(Op1)) - if (!isa(RHSNeg) || - (isa(RHSNeg) && - cast(RHSNeg)->getValue().isStrictlyPositive())) { - // X % -Y -> X % Y - Worklist.AddValue(I.getOperand(1)); - I.setOperand(1, RHSNeg); - return &I; - } +/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is +/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient +/// (V-Lo) (ConstantExpr::getICmp((isSigned ? + ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && + "Lo is not <= Hi in range emission code!"); + + if (Inside) { + if (Lo == Hi) // Trivially false. + return new ICmpInst(ICmpInst::ICMP_NE, V, V); - // If the sign bits of both operands are zero (i.e. we can prove they are - // unsigned inputs), turn this into a urem. - if (I.getType()->isInteger()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { - // X srem Y -> X urem Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateURem(Op0, Op1, I.getName()); + // V >= Min && V < Hi --> V < Hi + if (cast(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); + return new ICmpInst(pred, V, Hi); } - } - - // If it's a constant vector, flip any negative values positive. - if (ConstantVector *RHSV = dyn_cast(Op1)) { - unsigned VWidth = RHSV->getNumOperands(); - bool hasNegative = false; - for (unsigned i = 0; !hasNegative && i != VWidth; ++i) - if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) - if (RHS->getValue().isNegative()) - hasNegative = true; + // Emit V-Lo CreateAdd(V, NegLo, V->getName()+".off"); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); + return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); + } - if (hasNegative) { - std::vector Elts(VWidth); - for (unsigned i = 0; i != VWidth; ++i) { - if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) { - if (RHS->getValue().isNegative()) - Elts[i] = cast(ConstantExpr::getNeg(RHS)); - else - Elts[i] = RHS; - } - } + if (Lo == Hi) // Trivially true. + return new ICmpInst(ICmpInst::ICMP_EQ, V, V); - Constant *NewRHSV = ConstantVector::get(Elts); - if (NewRHSV != RHSV) { - Worklist.AddValue(I.getOperand(1)); - I.setOperand(1, NewRHSV); - return &I; - } - } + // V < Min || V >= Hi -> V > Hi-1 + Hi = SubOne(cast(Hi)); + if (cast(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); + return new ICmpInst(pred, V, Hi); } - return 0; -} - -Instruction *InstCombiner::visitFRem(BinaryOperator &I) { - return commonRemTransforms(I); + // Emit V-Lo >u Hi-1-Lo + // Note that Hi has already had one subtracted from it, above. + ConstantInt *NegLo = cast(ConstantExpr::getNeg(Lo)); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); + return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); } -// isOneBitSet - Return true if there is exactly one bit set in the specified -// constant. -static bool isOneBitSet(const ConstantInt *CI) { - return CI->getValue().isPowerOf2(); -} +// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with +// any number of 0s on either side. The 1s are allowed to wrap from LSB to +// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is +// not, since all 1s are not contiguous. +static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { + const APInt& V = Val->getValue(); + uint32_t BitWidth = Val->getType()->getBitWidth(); + if (!APIntOps::isShiftedMask(BitWidth, V)) return false; -// isHighOnes - Return true if the constant is of the form 1+0+. -// This is the same as lowones(~X). -static bool isHighOnes(const ConstantInt *CI) { - return (~CI->getValue() + 1).isPowerOf2(); + // look for the first zero bit after the run of ones + MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); + // look for the first non-zero bit + ME = V.getActiveBits(); + return true; } -/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits -/// are carefully arranged to allow folding of expressions such as: -/// -/// (A < B) | (A > B) --> (A != B) -/// -/// Note that this is only valid if the first and second predicates have the -/// same sign. Is illegal to do: (A u< B) | (A s> B) +/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, +/// where isSub determines whether the operator is a sub. If we can fold one of +/// the following xforms: +/// +/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask +/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 +/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 /// -/// Three bits are used to represent the condition, as follows: -/// 0 A > B -/// 1 A == B -/// 2 A < B +/// return (A +/- B). /// -/// <=> Value Definition -/// 000 0 Always false -/// 001 1 A > B -/// 010 2 A == B -/// 011 3 A >= B -/// 100 4 A < B -/// 101 5 A != B -/// 110 6 A <= B -/// 111 7 Always true -/// -static unsigned getICmpCode(const ICmpInst *ICI) { - switch (ICI->getPredicate()) { - // False -> 0 - case ICmpInst::ICMP_UGT: return 1; // 001 - case ICmpInst::ICMP_SGT: return 1; // 001 - case ICmpInst::ICMP_EQ: return 2; // 010 - case ICmpInst::ICMP_UGE: return 3; // 011 - case ICmpInst::ICMP_SGE: return 3; // 011 - case ICmpInst::ICMP_ULT: return 4; // 100 - case ICmpInst::ICMP_SLT: return 4; // 100 - case ICmpInst::ICMP_NE: return 5; // 101 - case ICmpInst::ICMP_ULE: return 6; // 110 - case ICmpInst::ICMP_SLE: return 6; // 110 - // True -> 7 - default: - llvm_unreachable("Invalid ICmp predicate!"); +Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, + ConstantInt *Mask, bool isSub, + Instruction &I) { + Instruction *LHSI = dyn_cast(LHS); + if (!LHSI || LHSI->getNumOperands() != 2 || + !isa(LHSI->getOperand(1))) return 0; + + ConstantInt *N = cast(LHSI->getOperand(1)); + + switch (LHSI->getOpcode()) { + default: return 0; + case Instruction::And: + if (ConstantExpr::getAnd(N, Mask) == Mask) { + // If the AndRHS is a power of two minus one (0+1+), this is simple. + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == + Mask->getValue().getBitWidth()) + break; + + // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ + // part, we don't need any explicit masks to take them out of A. If that + // is all N is, ignore it. + uint32_t MB = 0, ME = 0; + if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive + uint32_t BitWidth = cast(RHS->getType())->getBitWidth(); + APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); + if (MaskedValueIsZero(RHS, Mask)) + break; + } + } + return 0; + case Instruction::Or: + case Instruction::Xor: + // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() + && ConstantExpr::getAnd(N, Mask)->isNullValue()) + break; return 0; } + + if (isSub) + return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); + return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); } -/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp -/// predicate into a three bit mask. It also returns whether it is an ordered -/// predicate by reference. -static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { - isOrdered = false; - switch (CC) { - case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 - case FCmpInst::FCMP_UNO: return 0; // 000 - case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 - case FCmpInst::FCMP_UGT: return 1; // 001 - case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 - case FCmpInst::FCMP_UEQ: return 2; // 010 - case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 - case FCmpInst::FCMP_UGE: return 3; // 011 - case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 - case FCmpInst::FCMP_ULT: return 4; // 100 - case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 - case FCmpInst::FCMP_UNE: return 5; // 101 - case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 - case FCmpInst::FCMP_ULE: return 6; // 110 - // True -> 7 - default: - // Not expecting FCMP_FALSE and FCMP_TRUE; - llvm_unreachable("Unexpected FCmp predicate!"); +/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. +Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, + ICmpInst *LHS, ICmpInst *RHS) { + Value *Val, *Val2; + ConstantInt *LHSCst, *RHSCst; + ICmpInst::Predicate LHSCC, RHSCC; + + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). + if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), + m_ConstantInt(LHSCst))) || + !match(RHS, m_ICmp(RHSCC, m_Value(Val2), + m_ConstantInt(RHSCst)))) return 0; + + if (LHSCst == RHSCst && LHSCC == RHSCC) { + // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) + // where C is a power of 2 + if (LHSCC == ICmpInst::ICMP_ULT && + LHSCst->getValue().isPowerOf2()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } + + // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) + if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } } -} - -/// getICmpValue - This is the complement of getICmpCode, which turns an -/// opcode and two operands into either a constant true or false, or a brand -/// new ICmp instruction. The sign is passed in to determine which kind -/// of predicate to use in the new icmp instruction. -static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) { - switch (code) { - default: llvm_unreachable("Illegal ICmp code!"); - case 0: return ConstantInt::getFalse(LHS->getContext()); - case 1: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); - case 2: return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); - case 3: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); - case 4: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); - case 5: return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); - case 6: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); - case 7: return ConstantInt::getTrue(LHS->getContext()); + + // From here on, we only handle: + // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; + + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // We can't fold (ugt x, C) & (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (CmpInst::isSigned(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + CmpInst::isSigned(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); } -} -/// getFCmpValue - This is the complement of getFCmpCode, which turns an -/// opcode and two operands into either a FCmp instruction. isordered is passed -/// in to determine which kind of predicate to use in the new fcmp instruction. -static Value *getFCmpValue(bool isordered, unsigned code, - Value *LHS, Value *RHS) { - switch (code) { - default: llvm_unreachable("Illegal FCmp code!"); - case 0: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); - case 1: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); - case 2: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); - case 3: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); - case 4: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); - case 5: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); - case 6: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); - case 7: return ConstantInt::getTrue(LHS->getContext()); - } -} + // At this point, we know we have have two icmp instructions + // comparing a value against two constants and and'ing the result + // together. Because of the above check, we know that we only have + // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know + // (from the FoldICmpLogical check above), that the two constants + // are not equal and that the larger constant is on the RHS + assert(LHSCst != RHSCst && "Compares not folded above?"); -/// PredicatesFoldable - Return true if both predicates match sign or if at -/// least one of them is an equality comparison (which is signless). -static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { - return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || - (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || - (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); -} - -namespace { -// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) -struct FoldICmpLogical { - InstCombiner &IC; - Value *LHS, *RHS; - ICmpInst::Predicate pred; - FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI) - : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)), - pred(ICI->getPredicate()) {} - bool shouldApply(Value *V) const { - if (ICmpInst *ICI = dyn_cast(V)) - if (PredicatesFoldable(pred, ICI->getPredicate())) - return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) || - (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS)); - return false; - } - Instruction *apply(Instruction &Log) const { - ICmpInst *ICI = cast(Log.getOperand(0)); - if (ICI->getOperand(0) != LHS) { - assert(ICI->getOperand(1) == LHS); - ICI->swapOperands(); // Swap the LHS and RHS of the ICmp - } - - ICmpInst *RHSICI = cast(Log.getOperand(1)); - unsigned LHSCode = getICmpCode(ICI); - unsigned RHSCode = getICmpCode(RHSICI); - unsigned Code; - switch (Log.getOpcode()) { - case Instruction::And: Code = LHSCode & RHSCode; break; - case Instruction::Or: Code = LHSCode | RHSCode; break; - case Instruction::Xor: Code = LHSCode ^ RHSCode; break; - default: llvm_unreachable("Illegal logical opcode!"); return 0; - } - - bool isSigned = RHSICI->isSigned() || ICI->isSigned(); - Value *RV = getICmpValue(isSigned, Code, LHS, RHS); - if (Instruction *I = dyn_cast(RV)) - return I; - // Otherwise, it's a constant boolean value... - return IC.ReplaceInstUsesWith(Log, RV); - } -}; -} // end anonymous namespace - -// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where -// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is -// guaranteed to be a binary operator. -Instruction *InstCombiner::OptAndOp(Instruction *Op, - ConstantInt *OpRHS, - ConstantInt *AndRHS, - BinaryOperator &TheAnd) { - Value *X = Op->getOperand(0); - Constant *Together = 0; - if (!Op->isShift()) - Together = ConstantExpr::getAnd(AndRHS, OpRHS); - - switch (Op->getOpcode()) { - case Instruction::Xor: - if (Op->hasOneUse()) { - // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) - Value *And = Builder->CreateAnd(X, AndRHS); - And->takeName(Op); - return BinaryOperator::CreateXor(And, Together); + switch (LHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false + case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false + case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 + case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 + case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 + return ReplaceInstUsesWith(I, LHS); } - break; - case Instruction::Or: - if (Together == AndRHS) // (X | C) & C --> C - return ReplaceInstUsesWith(TheAnd, AndRHS); - - if (Op->hasOneUse() && Together != OpRHS) { - // (X | C1) & C2 --> (X | (C1&C2)) & C2 - Value *Or = Builder->CreateOr(X, Together); - Or->takeName(Op); - return BinaryOperator::CreateAnd(Or, AndRHS); + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_ULT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 + return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); + break; // (X != 13 & X u< 15) -> no change + case ICmpInst::ICMP_SLT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 + return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); + break; // (X != 13 & X s< 15) -> no change + case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 + case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_NE: + if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + return new ICmpInst(ICmpInst::ICMP_UGT, Add, + ConstantInt::get(Add->getType(), 1)); + } + break; // (X != 13 & X != 15) -> no change } break; - case Instruction::Add: - if (Op->hasOneUse()) { - // Adding a one to a single bit bit-field should be turned into an XOR - // of the bit. First thing to check is to see if this AND is with a - // single bit constant. - const APInt& AndRHSV = cast(AndRHS)->getValue(); - - // If there is only one bit set... - if (isOneBitSet(cast(AndRHS))) { - // Ok, at this point, we know that we are masking the result of the - // ADD down to exactly one bit. If the constant we are adding has - // no bits set below this bit, then we can eliminate the ADD. - const APInt& AddRHS = cast(OpRHS)->getValue(); - - // Check to see if any bits below the one bit set in AndRHSV are set. - if ((AddRHS & (AndRHSV-1)) == 0) { - // If not, the only thing that can effect the output of the AND is - // the bit specified by AndRHSV. If that bit is set, the effect of - // the XOR is to toggle the bit. If it is clear, then the ADD has - // no effect. - if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop - TheAnd.setOperand(0, X); - return &TheAnd; - } else { - // Pull the XOR out of the AND. - Value *NewAnd = Builder->CreateAnd(X, AndRHS); - NewAnd->takeName(Op); - return BinaryOperator::CreateXor(NewAnd, AndRHS); - } - } - } + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false + case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 + case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change + break; } break; - - case Instruction::Shl: { - // We know that the AND will not produce any of the bits shifted in, so if - // the anded constant includes them, clear them now! - // - uint32_t BitWidth = AndRHS->getType()->getBitWidth(); - uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); - APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); - ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), - AndRHS->getValue() & ShlMask); - - if (CI->getValue() == ShlMask) { - // Masking out bits that the shift already masks - return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. - } else if (CI != AndRHS) { // Reducing bits set in and. - TheAnd.setOperand(1, CI); - return &TheAnd; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false + case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 + case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change + break; } break; - } - case Instruction::LShr: { - // We know that the AND will not produce any of the bits shifted in, so if - // the anded constant includes them, clear them now! This only applies to - // unsigned shifts, because a signed shr may bring in set bits! - // - uint32_t BitWidth = AndRHS->getType()->getBitWidth(); - uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); - APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - ConstantInt *CI = ConstantInt::get(Op->getContext(), - AndRHS->getValue() & ShrMask); - - if (CI->getValue() == ShrMask) { - // Masking out bits that the shift already masks. - return ReplaceInstUsesWith(TheAnd, Op); - } else if (CI != AndRHS) { - TheAnd.setOperand(1, CI); // Reduce bits set in and cst. - return &TheAnd; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 + return new ICmpInst(LHSCC, Val, RHSCst); + break; // (X u> 13 & X != 15) -> no change + case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) 13 & X s< 15) -> no change + break; } break; - } - case Instruction::AShr: - // Signed shr. - // See if this is shifting in some sign extension, then masking it out - // with an and. - if (Op->hasOneUse()) { - uint32_t BitWidth = AndRHS->getType()->getBitWidth(); - uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); - APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - Constant *C = ConstantInt::get(Op->getContext(), - AndRHS->getValue() & ShrMask); - if (C == AndRHS) { // Masking out bits shifted in. - // (Val ashr C1) & C2 -> (Val lshr C1) & C2 - // Make the argument unsigned. - Value *ShVal = Op->getOperand(0); - ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); - return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); - } + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 + return new ICmpInst(LHSCC, Val, RHSCst); + break; // (X s> 13 & X != 15) -> no change + case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 + return InsertRangeTest(Val, AddOne(LHSCst), + RHSCst, true, true, I); + case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change + break; } break; } + return 0; } - -/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient -/// (V-Lo) (ConstantExpr::getICmp((isSigned ? - ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && - "Lo is not <= Hi in range emission code!"); +Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + + if (LHS->getPredicate() == FCmpInst::FCMP_ORD && + RHS->getPredicate() == FCmpInst::FCMP_ORD) { + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // false. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + } - if (Inside) { - if (Lo == Hi) // Trivially false. - return new ICmpInst(ICmpInst::ICMP_NE, V, V); - - // V >= Min && V < Hi --> V < Hi - if (cast(Lo)->isMinValue(isSigned)) { - ICmpInst::Predicate pred = (isSigned ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); - return new ICmpInst(pred, V, Hi); + // Handle vector zeros. This occurs because the canonical form of + // "fcmp ord x,x" is "fcmp ord x, 0". + if (isa(LHS->getOperand(1)) && + isa(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); + + if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + if (Op0CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, LHS); + + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op1Pred == 0) { + std::swap(LHS, RHS); + std::swap(Op0Pred, Op1Pred); + std::swap(Op0Ordered, Op1Ordered); + } + if (Op0Pred == 0) { + // uno && ueq -> uno && (uno || eq) -> ueq + // ord && olt -> ord && (ord && lt) -> olt + if (Op0Ordered == Op1Ordered) + return ReplaceInstUsesWith(I, RHS); + + // uno && oeq -> uno && (ord && eq) -> false + // uno && ord -> false + if (!Op0Ordered) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + // ord && ueq -> ord && (uno || eq) -> oeq + return cast(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS)); } - - // Emit V-Lo CreateAdd(V, NegLo, V->getName()+".off"); - Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); } - if (Lo == Hi) // Trivially true. - return new ICmpInst(ICmpInst::ICMP_EQ, V, V); + return 0; +} - // V < Min || V >= Hi -> V > Hi-1 - Hi = SubOne(cast(Hi)); - if (cast(Lo)->isMinValue(isSigned)) { - ICmpInst::Predicate pred = (isSigned ? - ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); - return new ICmpInst(pred, V, Hi); - } - // Emit V-Lo >u Hi-1-Lo - // Note that Hi has already had one subtracted from it, above. - ConstantInt *NegLo = cast(ConstantExpr::getNeg(Lo)); - Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); - Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); -} +Instruction *InstCombiner::visitAnd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); -// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with -// any number of 0s on either side. The 1s are allowed to wrap from LSB to -// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is -// not, since all 1s are not contiguous. -static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { - const APInt& V = Val->getValue(); - uint32_t BitWidth = Val->getType()->getBitWidth(); - if (!APIntOps::isShiftedMask(BitWidth, V)) return false; + if (Value *V = SimplifyAndInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); - // look for the first zero bit after the run of ones - MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); - // look for the first non-zero bit - ME = V.getActiveBits(); - return true; -} + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; -/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, -/// where isSub determines whether the operator is a sub. If we can fold one of -/// the following xforms: -/// -/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask -/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 -/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 -/// -/// return (A +/- B). -/// -Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, - ConstantInt *Mask, bool isSub, - Instruction &I) { - Instruction *LHSI = dyn_cast(LHS); - if (!LHSI || LHSI->getNumOperands() != 2 || - !isa(LHSI->getOperand(1))) return 0; + if (ConstantInt *AndRHS = dyn_cast(Op1)) { + const APInt &AndRHSMask = AndRHS->getValue(); + APInt NotAndRHS(~AndRHSMask); - ConstantInt *N = cast(LHSI->getOperand(1)); + // Optimize a variety of ((val OP C1) & C2) combinations... + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + Value *Op0LHS = Op0I->getOperand(0); + Value *Op0RHS = Op0I->getOperand(1); + switch (Op0I->getOpcode()) { + default: break; + case Instruction::Xor: + case Instruction::Or: + // If the mask is only needed on one incoming arm, push it up. + if (!Op0I->hasOneUse()) break; + + if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { + // Not masking anything out for the LHS, move to RHS. + Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); + } + if (!isa(Op0RHS) && + MaskedValueIsZero(Op0RHS, NotAndRHS)) { + // Not masking anything out for the RHS, move to LHS. + Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); + } - switch (LHSI->getOpcode()) { - default: return 0; - case Instruction::And: - if (ConstantExpr::getAnd(N, Mask) == Mask) { - // If the AndRHS is a power of two minus one (0+1+), this is simple. - if ((Mask->getValue().countLeadingZeros() + - Mask->getValue().countPopulation()) == - Mask->getValue().getBitWidth()) + break; + case Instruction::Add: + // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes break; - // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ - // part, we don't need any explicit masks to take them out of A. If that - // is all N is, ignore it. - uint32_t MB = 0, ME = 0; - if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive - uint32_t BitWidth = cast(RHS->getType())->getBitWidth(); - APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); - if (MaskedValueIsZero(RHS, Mask)) - break; + case Instruction::Sub: + // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + + // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS + // has 1's for all bits that the subtraction with A might affect. + if (Op0I->hasOneUse()) { + uint32_t BitWidth = AndRHSMask.getBitWidth(); + uint32_t Zeros = AndRHSMask.countLeadingZeros(); + APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); + + ConstantInt *A = dyn_cast(Op0LHS); + if (!(A && A->isZero()) && // avoid infinite recursion. + MaskedValueIsZero(Op0LHS, Mask)) { + Value *NewNeg = Builder->CreateNeg(Op0RHS); + return BinaryOperator::CreateAnd(NewNeg, AndRHS); + } + } + break; + + case Instruction::Shl: + case Instruction::LShr: + // (1 << x) & 1 --> zext(x == 0) + // (1 >> x) & 1 --> zext(x == 0) + if (AndRHSMask == 1 && Op0LHS == AndRHS) { + Value *NewICmp = + Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); + return new ZExtInst(NewICmp, I.getType()); + } + break; } - } - return 0; - case Instruction::Or: - case Instruction::Xor: - // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 - if ((Mask->getValue().countLeadingZeros() + - Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() - && ConstantExpr::getAnd(N, Mask)->isNullValue()) - break; - return 0; - } - - if (isSub) - return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); - return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); -} -/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. -Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { - Value *Val, *Val2; - ConstantInt *LHSCst, *RHSCst; - ICmpInst::Predicate LHSCC, RHSCC; - - // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), - m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), - m_ConstantInt(RHSCst)))) - return 0; - - if (LHSCst == RHSCst && LHSCC == RHSCC) { - // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) - // where C is a power of 2 - if (LHSCC == ICmpInst::ICMP_ULT && - LHSCst->getValue().isPowerOf2()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); + if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) + if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) + return Res; + } else if (CastInst *CI = dyn_cast(Op0)) { + // If this is an integer truncation or change from signed-to-unsigned, and + // if the source is an and/or with immediate, transform it. This + // frequently occurs for bitfield accesses. + if (Instruction *CastOp = dyn_cast(CI->getOperand(0))) { + if ((isa(CI) || isa(CI)) && + CastOp->getNumOperands() == 2) + if (ConstantInt *AndCI =dyn_cast(CastOp->getOperand(1))){ + if (CastOp->getOpcode() == Instruction::And) { + // Change: and (cast (and X, C1) to T), C2 + // into : and (cast X to T), trunc_or_bitcast(C1)&C2 + // This will fold the two constants together, which may allow + // other simplifications. + Value *NewCast = Builder->CreateTruncOrBitCast( + CastOp->getOperand(0), I.getType(), + CastOp->getName()+".shrunk"); + // trunc_or_bitcast(C1)&C2 + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); + return BinaryOperator::CreateAnd(NewCast, C3); + } else if (CastOp->getOpcode() == Instruction::Or) { + // Change: and (cast (or X, C1) to T), C2 + // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) + // trunc(C1)&C2 + return ReplaceInstUsesWith(I, AndRHS); + } + } + } } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + + // (~A & ~B) == (~(A | B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(Or); + } + + { + Value *A = 0, *B = 0, *C = 0, *D = 0; + // (A|B) & ~(A&B) -> A^B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); - // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) - if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); + // ~(A&B) & (A|B) -> A^B + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); + + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1) { // (A^B)&A -> A&(A^B) + I.swapOperands(); // Simplify below + std::swap(Op0, Op1); + } else if (B == Op1) { // (A^B)&B -> B&(B^A) + cast(Op0)->swapOperands(); + I.swapOperands(); // Simplify below + std::swap(Op0, Op1); + } + } + + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_Value(B)))) { + if (B == Op0) { // B&(A^B) -> B&(B^A) + cast(Op1)->swapOperands(); + std::swap(A, B); + } + if (A == Op0) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); } + + // (A&((~A)|B)) -> A&B + if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || + match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) + return BinaryOperator::CreateAnd(A, Op1); + if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || + match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) + return BinaryOperator::CreateAnd(A, Op0); } - // From here on, we only handle: - // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; - - // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. - if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || - RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || - LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || - RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; - - // We can't fold (ugt x, C) & (sgt x, C2). - if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; - - // Ensure that the larger constant is on the RHS. - bool ShouldSwap; - if (CmpInst::isSigned(LHSCC) || - (ICmpInst::isEquality(LHSCC) && - CmpInst::isSigned(RHSCC))) - ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); - else - ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + if (ICmpInst *RHS = dyn_cast(Op1)) { + // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) + return R; + + if (ICmpInst *LHS = dyn_cast(Op0)) + if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) + return Res; + } + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) + if (CastInst *Op0C = dyn_cast(Op0)) + if (CastInst *Op1C = dyn_cast(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && + // Only do this if the casts both really cause code to be generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } - if (ShouldSwap) { - std::swap(LHS, RHS); - std::swap(LHSCst, RHSCst); - std::swap(LHSCC, RHSCC); + // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast(Op1)) { + if (BinaryOperator *SI0 = dyn_cast(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Value *NewOp = + Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } } - // At this point, we know we have have two icmp instructions - // comparing a value against two constants and and'ing the result - // together. Because of the above check, we know that we only have - // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know - // (from the FoldICmpLogical check above), that the two constants - // are not equal and that the larger constant is on the RHS - assert(LHSCst != RHSCst && "Compares not folded above?"); + // If and'ing two fcmp, try combine them into one. + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) + return Res; + } - switch (LHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false - case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 - case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 - case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 - return ReplaceInstUsesWith(I, LHS); + return Changed ? &I : 0; +} + +/// CollectBSwapParts - Analyze the specified subexpression and see if it is +/// capable of providing pieces of a bswap. The subexpression provides pieces +/// of a bswap if it is proven that each of the non-zero bytes in the output of +/// the expression came from the corresponding "byte swapped" byte in some other +/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then +/// we know that the expression deposits the low byte of %X into the high byte +/// of the bswap result and that all other bytes are zero. This expression is +/// accepted, the high byte of ByteValues is set to X to indicate a correct +/// match. +/// +/// This function returns true if the match was unsuccessful and false if so. +/// On entry to the function the "OverallLeftShift" is a signed integer value +/// indicating the number of bytes that the subexpression is later shifted. For +/// example, if the expression is later right shifted by 16 bits, the +/// OverallLeftShift value would be -2 on entry. This is used to specify which +/// byte of ByteValues is actually being set. +/// +/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding +/// byte is masked to zero by a user. For example, in (X & 255), X will be +/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits +/// this function to working on up to 32-byte (256 bit) values. ByteMask is +/// always in the local (OverallLeftShift) coordinate space. +/// +static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, + SmallVector &ByteValues) { + if (Instruction *I = dyn_cast(V)) { + // If this is an or instruction, it may be an inner node of the bswap. + if (I->getOpcode() == Instruction::Or) { + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues) || + CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, + ByteValues); } - case ICmpInst::ICMP_NE: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_ULT: - if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); - break; // (X != 13 & X u< 15) -> no change - case ICmpInst::ICMP_SLT: - if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); - break; // (X != 13 & X s< 15) -> no change - case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 - case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_NE: - if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 - Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, - ConstantInt::get(Add->getType(), 1)); + + // If this is a logical shift by a constant multiple of 8, recurse with + // OverallLeftShift and ByteMask adjusted. + if (I->isLogicalShift() && isa(I->getOperand(1))) { + unsigned ShAmt = + cast(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined and of a byte value. + if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) + return true; + + unsigned ByteShift = ShAmt >> 3; + if (I->getOpcode() == Instruction::Shl) { + // X << 2 -> collect(X, +2) + OverallLeftShift += ByteShift; + ByteMask >>= ByteShift; + } else { + // X >>u 2 -> collect(X, -2) + OverallLeftShift -= ByteShift; + ByteMask <<= ByteShift; + ByteMask &= (~0U >> (32-ByteValues.size())); } - break; // (X != 13 & X != 15) -> no change - } - break; - case ICmpInst::ICMP_ULT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 - case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SLT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false - case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 - case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_UGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 - return new ICmpInst(LHSCC, Val, RHSCst); - break; // (X u> 13 & X != 15) -> no change - case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) 13 & X s< 15) -> no change - break; + + if (OverallLeftShift >= (int)ByteValues.size()) return true; + if (OverallLeftShift <= -(int)ByteValues.size()) return true; + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); } - break; - case ICmpInst::ICMP_SGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 - return new ICmpInst(LHSCC, Val, RHSCst); - break; // (X s> 13 & X != 15) -> no change - case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 - return InsertRangeTest(Val, AddOne(LHSCst), - RHSCst, true, true, I); - case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change - break; + + // If this is a logical 'and' with a mask that clears bytes, clear the + // corresponding bytes in ByteMask. + if (I->getOpcode() == Instruction::And && + isa(I->getOperand(1))) { + // Scan every byte of the and mask, seeing if the byte is either 0 or 255. + unsigned NumBytes = ByteValues.size(); + APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); + const APInt &AndMask = cast(I->getOperand(1))->getValue(); + + for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { + // If this byte is masked out by a later operation, we don't care what + // the and mask is. + if ((ByteMask & (1 << i)) == 0) + continue; + + // If the AndMask is all zeros for this byte, clear the bit. + APInt MaskB = AndMask & Byte; + if (MaskB == 0) { + ByteMask &= ~(1U << i); + continue; + } + + // If the AndMask is not all ones for this byte, it's not a bytezap. + if (MaskB != Byte) + return true; + + // Otherwise, this byte is kept. + } + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); } - break; } - - return 0; + + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be + // the input value to the bswap. Some observations: 1) if more than one byte + // is demanded from this input, then it could not be successfully assembled + // into a byteswap. At least one of the two bytes would not be aligned with + // their ultimate destination. + if (!isPowerOf2_32(ByteMask)) return true; + unsigned InputByteNo = CountTrailingZeros_32(ByteMask); + + // 2) The input and ultimate destinations must line up: if byte 3 of an i32 + // is demanded, it needs to go into byte 0 of the result. This means that the + // byte needs to be shifted until it lands in the right byte bucket. The + // shift amount depends on the position: if the byte is coming from the high + // part of the value (e.g. byte 3) then it must be shifted right. If from the + // low part, it must be shifted left. + unsigned DestByteNo = InputByteNo + OverallLeftShift; + if (InputByteNo < ByteValues.size()/2) { + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; + } else { + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; + } + + // If the destination byte value is already defined, the values are or'd + // together, which isn't a bswap (unless it's an or of the same bits). + if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) + return true; + ByteValues[DestByteNo] = V; + return false; } -Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { +/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. +/// If so, insert the new bswap intrinsic and return it. +Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { + const IntegerType *ITy = dyn_cast(I.getType()); + if (!ITy || ITy->getBitWidth() % 16 || + // ByteMask only allows up to 32-byte values. + ITy->getBitWidth() > 32*8) + return 0; // Can only bswap pairs of bytes. Can't do vectors. - if (LHS->getPredicate() == FCmpInst::FCMP_ORD && - RHS->getPredicate() == FCmpInst::FCMP_ORD) { - // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) - if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // false. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); - } + /// ByteValues - For each byte of the result, we keep track of which value + /// defines each byte. + SmallVector ByteValues; + ByteValues.resize(ITy->getBitWidth()/8); - // Handle vector zeros. This occurs because the canonical form of - // "fcmp ord x,x" is "fcmp ord x, 0". - if (isa(LHS->getOperand(1)) && - isa(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); + // Try to find all the pieces corresponding to the bswap. + uint32_t ByteMask = ~0U >> (32-ByteValues.size()); + if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) return 0; - } - - Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); - Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); - FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); - - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } + // Check to see if all of the bytes come from the same value. + Value *V = ByteValues[0]; + if (V == 0) return 0; // Didn't find a byte? Must be zero. - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); - - if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - if (Op0CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, RHS); - if (Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, LHS); - - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op1Pred == 0) { - std::swap(LHS, RHS); - std::swap(Op0Pred, Op1Pred); - std::swap(Op0Ordered, Op1Ordered); - } - if (Op0Pred == 0) { - // uno && ueq -> uno && (uno || eq) -> ueq - // ord && olt -> ord && (ord && lt) -> olt - if (Op0Ordered == Op1Ordered) - return ReplaceInstUsesWith(I, RHS); - - // uno && oeq -> uno && (ord && eq) -> false - // uno && ord -> false - if (!Op0Ordered) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - // ord && ueq -> ord && (uno || eq) -> oeq - return cast(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS)); - } - } - - return 0; + // Check to make sure that all of the bytes come from the same value. + for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) + if (ByteValues[i] != V) + return 0; + const Type *Tys[] = { ITy }; + Module *M = I.getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + return CallInst::Create(F, V); } +/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check +/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then +/// we can simplify this expression to "cond ? C : D or B". +static Instruction *MatchSelectFromAndOr(Value *A, Value *B, + Value *C, Value *D) { + // If A is not a select of -1/0, this cannot match. + Value *Cond = 0; + if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) + return 0; -Instruction *InstCombiner::visitAnd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyAndInst(Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(I)) - return &I; - - if (ConstantInt *AndRHS = dyn_cast(Op1)) { - const APInt &AndRHSMask = AndRHS->getValue(); - APInt NotAndRHS(~AndRHSMask); - - // Optimize a variety of ((val OP C1) & C2) combinations... - if (BinaryOperator *Op0I = dyn_cast(Op0)) { - Value *Op0LHS = Op0I->getOperand(0); - Value *Op0RHS = Op0I->getOperand(1); - switch (Op0I->getOpcode()) { - default: break; - case Instruction::Xor: - case Instruction::Or: - // If the mask is only needed on one incoming arm, push it up. - if (!Op0I->hasOneUse()) break; - - if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { - // Not masking anything out for the LHS, move to RHS. - Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, - Op0RHS->getName()+".masked"); - return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); - } - if (!isa(Op0RHS) && - MaskedValueIsZero(Op0RHS, NotAndRHS)) { - // Not masking anything out for the RHS, move to LHS. - Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, - Op0LHS->getName()+".masked"); - return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); - } - - break; - case Instruction::Add: - // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. - // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 - // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 - if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) - return BinaryOperator::CreateAnd(V, AndRHS); - if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) - return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes - break; - - case Instruction::Sub: - // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. - // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 - // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 - if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) - return BinaryOperator::CreateAnd(V, AndRHS); - - // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS - // has 1's for all bits that the subtraction with A might affect. - if (Op0I->hasOneUse()) { - uint32_t BitWidth = AndRHSMask.getBitWidth(); - uint32_t Zeros = AndRHSMask.countLeadingZeros(); - APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); - - ConstantInt *A = dyn_cast(Op0LHS); - if (!(A && A->isZero()) && // avoid infinite recursion. - MaskedValueIsZero(Op0LHS, Mask)) { - Value *NewNeg = Builder->CreateNeg(Op0RHS); - return BinaryOperator::CreateAnd(NewNeg, AndRHS); - } - } - break; - - case Instruction::Shl: - case Instruction::LShr: - // (1 << x) & 1 --> zext(x == 0) - // (1 >> x) & 1 --> zext(x == 0) - if (AndRHSMask == 1 && Op0LHS == AndRHS) { - Value *NewICmp = - Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); - return new ZExtInst(NewICmp, I.getType()); - } - break; - } - - if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) - if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) - return Res; - } else if (CastInst *CI = dyn_cast(Op0)) { - // If this is an integer truncation or change from signed-to-unsigned, and - // if the source is an and/or with immediate, transform it. This - // frequently occurs for bitfield accesses. - if (Instruction *CastOp = dyn_cast(CI->getOperand(0))) { - if ((isa(CI) || isa(CI)) && - CastOp->getNumOperands() == 2) - if (ConstantInt *AndCI =dyn_cast(CastOp->getOperand(1))){ - if (CastOp->getOpcode() == Instruction::And) { - // Change: and (cast (and X, C1) to T), C2 - // into : and (cast X to T), trunc_or_bitcast(C1)&C2 - // This will fold the two constants together, which may allow - // other simplifications. - Value *NewCast = Builder->CreateTruncOrBitCast( - CastOp->getOperand(0), I.getType(), - CastOp->getName()+".shrunk"); - // trunc_or_bitcast(C1)&C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - C3 = ConstantExpr::getAnd(C3, AndRHS); - return BinaryOperator::CreateAnd(NewCast, C3); - } else if (CastOp->getOpcode() == Instruction::Or) { - // Change: and (cast (or X, C1) to T), C2 - // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) - // trunc(C1)&C2 - return ReplaceInstUsesWith(I, AndRHS); - } - } - } - } - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - - // (~A & ~B) == (~(A | B)) - De Morgan's Law - if (Value *Op0NotVal = dyn_castNotVal(Op0)) - if (Value *Op1NotVal = dyn_castNotVal(Op1)) - if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(Or); - } - - { - Value *A = 0, *B = 0, *C = 0, *D = 0; - // (A|B) & ~(A&B) -> A^B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - // ~(A&B) & (A|B) -> A^B - if (match(Op1, m_Or(m_Value(A), m_Value(B))) && - match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1) { // (A^B)&A -> A&(A^B) - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } else if (B == Op1) { // (A^B)&B -> B&(B^A) - cast(Op0)->swapOperands(); - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } - } - - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_Value(B)))) { - if (B == Op0) { // B&(A^B) -> B&(B^A) - cast(Op1)->swapOperands(); - std::swap(A, B); - } - if (A == Op0) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); - } - - // (A&((~A)|B)) -> A&B - if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || - match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) - return BinaryOperator::CreateAnd(A, Op1); - if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || - match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) - return BinaryOperator::CreateAnd(A, Op0); - } - - if (ICmpInst *RHS = dyn_cast(Op1)) { - // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) - return R; - - if (ICmpInst *LHS = dyn_cast(Op0)) - if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) - return Res; - } - - // fold (and (cast A), (cast B)) -> (cast (and A, B)) - if (CastInst *Op0C = dyn_cast(Op0)) - if (CastInst *Op1C = dyn_cast(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType(), TD) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType(), TD)) { - Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - } - - // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. - if (BinaryOperator *SI1 = dyn_cast(Op1)) { - if (BinaryOperator *SI0 = dyn_cast(Op0)) - if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && - SI0->getOperand(1) == SI1->getOperand(1) && - (SI0->hasOneUse() || SI1->hasOneUse())) { - Value *NewOp = - Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), - SI0->getName()); - return BinaryOperator::Create(SI1->getOpcode(), NewOp, - SI1->getOperand(1)); - } - } - - // If and'ing two fcmp, try combine them into one. - if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) - return Res; - } - - return Changed ? &I : 0; -} - -/// CollectBSwapParts - Analyze the specified subexpression and see if it is -/// capable of providing pieces of a bswap. The subexpression provides pieces -/// of a bswap if it is proven that each of the non-zero bytes in the output of -/// the expression came from the corresponding "byte swapped" byte in some other -/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then -/// we know that the expression deposits the low byte of %X into the high byte -/// of the bswap result and that all other bytes are zero. This expression is -/// accepted, the high byte of ByteValues is set to X to indicate a correct -/// match. -/// -/// This function returns true if the match was unsuccessful and false if so. -/// On entry to the function the "OverallLeftShift" is a signed integer value -/// indicating the number of bytes that the subexpression is later shifted. For -/// example, if the expression is later right shifted by 16 bits, the -/// OverallLeftShift value would be -2 on entry. This is used to specify which -/// byte of ByteValues is actually being set. -/// -/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding -/// byte is masked to zero by a user. For example, in (X & 255), X will be -/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits -/// this function to working on up to 32-byte (256 bit) values. ByteMask is -/// always in the local (OverallLeftShift) coordinate space. -/// -static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, - SmallVector &ByteValues) { - if (Instruction *I = dyn_cast(V)) { - // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) { - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues) || - CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, - ByteValues); - } - - // If this is a logical shift by a constant multiple of 8, recurse with - // OverallLeftShift and ByteMask adjusted. - if (I->isLogicalShift() && isa(I->getOperand(1))) { - unsigned ShAmt = - cast(I->getOperand(1))->getLimitedValue(~0U); - // Ensure the shift amount is defined and of a byte value. - if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) - return true; - - unsigned ByteShift = ShAmt >> 3; - if (I->getOpcode() == Instruction::Shl) { - // X << 2 -> collect(X, +2) - OverallLeftShift += ByteShift; - ByteMask >>= ByteShift; - } else { - // X >>u 2 -> collect(X, -2) - OverallLeftShift -= ByteShift; - ByteMask <<= ByteShift; - ByteMask &= (~0U >> (32-ByteValues.size())); - } - - if (OverallLeftShift >= (int)ByteValues.size()) return true; - if (OverallLeftShift <= -(int)ByteValues.size()) return true; - - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues); - } - - // If this is a logical 'and' with a mask that clears bytes, clear the - // corresponding bytes in ByteMask. - if (I->getOpcode() == Instruction::And && - isa(I->getOperand(1))) { - // Scan every byte of the and mask, seeing if the byte is either 0 or 255. - unsigned NumBytes = ByteValues.size(); - APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); - const APInt &AndMask = cast(I->getOperand(1))->getValue(); - - for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { - // If this byte is masked out by a later operation, we don't care what - // the and mask is. - if ((ByteMask & (1 << i)) == 0) - continue; - - // If the AndMask is all zeros for this byte, clear the bit. - APInt MaskB = AndMask & Byte; - if (MaskB == 0) { - ByteMask &= ~(1U << i); - continue; - } - - // If the AndMask is not all ones for this byte, it's not a bytezap. - if (MaskB != Byte) - return true; - - // Otherwise, this byte is kept. - } - - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues); - } - } - - // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be - // the input value to the bswap. Some observations: 1) if more than one byte - // is demanded from this input, then it could not be successfully assembled - // into a byteswap. At least one of the two bytes would not be aligned with - // their ultimate destination. - if (!isPowerOf2_32(ByteMask)) return true; - unsigned InputByteNo = CountTrailingZeros_32(ByteMask); - - // 2) The input and ultimate destinations must line up: if byte 3 of an i32 - // is demanded, it needs to go into byte 0 of the result. This means that the - // byte needs to be shifted until it lands in the right byte bucket. The - // shift amount depends on the position: if the byte is coming from the high - // part of the value (e.g. byte 3) then it must be shifted right. If from the - // low part, it must be shifted left. - unsigned DestByteNo = InputByteNo + OverallLeftShift; - if (InputByteNo < ByteValues.size()/2) { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } else { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } - - // If the destination byte value is already defined, the values are or'd - // together, which isn't a bswap (unless it's an or of the same bits). - if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) - return true; - ByteValues[DestByteNo] = V; - return false; -} - -/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. -/// If so, insert the new bswap intrinsic and return it. -Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { - const IntegerType *ITy = dyn_cast(I.getType()); - if (!ITy || ITy->getBitWidth() % 16 || - // ByteMask only allows up to 32-byte values. - ITy->getBitWidth() > 32*8) - return 0; // Can only bswap pairs of bytes. Can't do vectors. - - /// ByteValues - For each byte of the result, we keep track of which value - /// defines each byte. - SmallVector ByteValues; - ByteValues.resize(ITy->getBitWidth()/8); - - // Try to find all the pieces corresponding to the bswap. - uint32_t ByteMask = ~0U >> (32-ByteValues.size()); - if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) - return 0; - - // Check to see if all of the bytes come from the same value. - Value *V = ByteValues[0]; - if (V == 0) return 0; // Didn't find a byte? Must be zero. - - // Check to make sure that all of the bytes come from the same value. - for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) - if (ByteValues[i] != V) - return 0; - const Type *Tys[] = { ITy }; - Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); - return CallInst::Create(F, V); -} - -/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check -/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then -/// we can simplify this expression to "cond ? C : D or B". -static Instruction *MatchSelectFromAndOr(Value *A, Value *B, - Value *C, Value *D) { - // If A is not a select of -1/0, this cannot match. - Value *Cond = 0; - if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) - return 0; - - // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. - if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) - return SelectInst::Create(Cond, C, B); - if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) - return SelectInst::Create(Cond, C, B); - // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. - if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) - return SelectInst::Create(Cond, C, D); - if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) - return SelectInst::Create(Cond, C, D); - return 0; -} - -/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. -Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { - Value *Val, *Val2; - ConstantInt *LHSCst, *RHSCst; - ICmpInst::Predicate LHSCC, RHSCC; - - // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) - return 0; - - - // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) - if (LHSCst == RHSCst && LHSCC == RHSCC && - LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); - } - - // From here on, we only handle: - // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; - - // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. - if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || - RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || - LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || - RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; - - // We can't fold (ugt x, C) | (sgt x, C2). - if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; - - // Ensure that the larger constant is on the RHS. - bool ShouldSwap; - if (CmpInst::isSigned(LHSCC) || - (ICmpInst::isEquality(LHSCC) && - CmpInst::isSigned(RHSCC))) - ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); - else - ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); - - if (ShouldSwap) { - std::swap(LHS, RHS); - std::swap(LHSCst, RHSCst); - std::swap(LHSCC, RHSCC); - } - - // At this point, we know we have have two icmp instructions - // comparing a value against two constants and or'ing the result - // together. Because of the above check, we know that we only have - // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the - // FoldICmpLogical check above), that the two constants are not - // equal. - assert(LHSCst != RHSCst && "Compares not folded above?"); - - switch (LHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - if (LHSCst == SubOne(RHSCst)) { - // (X == 13 | X == 14) -> X-13 CreateAdd(Val, AddCST, Val->getName()+".off"); - AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); - } - break; // (X == 13 | X == 15) -> no change - case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change - case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change - break; - case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 - case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); - } - break; - case ICmpInst::ICMP_NE: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 - case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 - case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true - case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true - case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - } - break; - case ICmpInst::ICMP_ULT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change - break; - case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 - // If RHSCst is [us]MAXINT, it is always false. Not handling - // this can cause overflow. - if (RHSCst->isMaxValue(false)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - false, false, I); - case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SLT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change - break; - case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 - // If RHSCst is [us]MAXINT, it is always false. Not handling - // this can cause overflow. - if (RHSCst->isMaxValue(true)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - true, false, I); - case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_UGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 - case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true - case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 - case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true - case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change - break; - } - break; - } - return 0; -} - -Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { - if (LHS->getPredicate() == FCmpInst::FCMP_UNO && - RHS->getPredicate() == FCmpInst::FCMP_UNO && - LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { - if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // true. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - - // Otherwise, no need to compare the two constants, compare the - // rest. - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); - } - - // Handle vector zeros. This occurs because the canonical form of - // "fcmp uno x,x" is "fcmp uno x, 0". - if (isa(LHS->getOperand(1)) && - isa(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); - - return 0; - } - - Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); - Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); - FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); - - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, - Op0LHS, Op0RHS); - if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - if (Op0CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, RHS); - if (Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, LHS); - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op0Ordered == Op1Ordered) { - // If both are ordered or unordered, return a new fcmp with - // or'ed predicates. - Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS); - if (Instruction *I = dyn_cast(RV)) - return I; - // Otherwise, it's a constant boolean value... - return ReplaceInstUsesWith(I, RV); - } - } - return 0; -} - -/// FoldOrWithConstants - This helper function folds: -/// -/// ((A | B) & C1) | (B & C2) -/// -/// into: -/// -/// (A & C1) | B -/// -/// when the XOR of the two constants is "all ones" (-1). -Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C) { - ConstantInt *CI1 = dyn_cast(C); - if (!CI1) return 0; - - Value *V1 = 0; - ConstantInt *CI2 = 0; - if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; - - APInt Xor = CI1->getValue() ^ CI2->getValue(); - if (!Xor.isAllOnesValue()) return 0; - - if (V1 == A || V1 == B) { - Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); - return BinaryOperator::CreateOr(NewOp, V1); - } - - return 0; -} - -Instruction *InstCombiner::visitOr(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyOrInst(Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(I)) - return &I; - - if (ConstantInt *RHS = dyn_cast(Op1)) { - ConstantInt *C1 = 0; Value *X = 0; - // (X & C1) | C2 --> (X | C2) & (C1|C2) - if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && - isOnlyUse(Op0)) { - Value *Or = Builder->CreateOr(X, RHS); - Or->takeName(Op0); - return BinaryOperator::CreateAnd(Or, - ConstantInt::get(I.getContext(), - RHS->getValue() | C1->getValue())); - } - - // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) - if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && - isOnlyUse(Op0)) { - Value *Or = Builder->CreateOr(X, RHS); - Or->takeName(Op0); - return BinaryOperator::CreateXor(Or, - ConstantInt::get(I.getContext(), - C1->getValue() & ~RHS->getValue())); - } - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - Value *A = 0, *B = 0; - ConstantInt *C1 = 0, *C2 = 0; - - // (A | B) | C and A | (B | C) -> bswap if possible. - // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. - if (match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())) || - (match(Op0, m_Shift(m_Value(), m_Value())) && - match(Op1, m_Shift(m_Value(), m_Value())))) { - if (Instruction *BSwap = MatchBSwap(I)) - return BSwap; - } - - // (X^C)|Y -> (X|Y)^C iff Y&C == 0 - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op1, C1->getValue())) { - Value *NOr = Builder->CreateOr(A, Op1); - NOr->takeName(Op0); - return BinaryOperator::CreateXor(NOr, C1); - } - - // Y|(X^C) -> (X|Y)^C iff Y&C == 0 - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op0, C1->getValue())) { - Value *NOr = Builder->CreateOr(A, Op0); - NOr->takeName(Op0); - return BinaryOperator::CreateXor(NOr, C1); - } - - // (A & C)|(B & D) - Value *C = 0, *D = 0; - if (match(Op0, m_And(m_Value(A), m_Value(C))) && - match(Op1, m_And(m_Value(B), m_Value(D)))) { - Value *V1 = 0, *V2 = 0, *V3 = 0; - C1 = dyn_cast(C); - C2 = dyn_cast(D); - if (C1 && C2) { // (A & C1)|(B & C2) - // If we have: ((V + N) & C1) | (V & C2) - // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 - // replace with V+N. - if (C1->getValue() == ~C2->getValue()) { - if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) - return ReplaceInstUsesWith(I, A); - if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) - return ReplaceInstUsesWith(I, A); - } - // Or commutes, try both ways. - if ((C1->getValue() & (C1->getValue()+1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) - return ReplaceInstUsesWith(I, B); - if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) - return ReplaceInstUsesWith(I, B); - } - } - - // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) - // iff (C1&C2) == 0 and (N&~C1) == 0 - if ((C1->getValue() & C2->getValue()) == 0) { - if (match(A, m_Or(m_Value(V1), m_Value(V2))) && - ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) - (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) - return BinaryOperator::CreateAnd(A, - ConstantInt::get(A->getContext(), - C1->getValue()|C2->getValue())); - // Or commutes, try both ways. - if (match(B, m_Or(m_Value(V1), m_Value(V2))) && - ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) - (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) - return BinaryOperator::CreateAnd(B, - ConstantInt::get(B->getContext(), - C1->getValue()|C2->getValue())); - } - } - - // Check to see if we have any common things being and'ed. If so, find the - // terms for V1 & (V2|V3). - if (isOnlyUse(Op0) || isOnlyUse(Op1)) { - V1 = 0; - if (A == B) // (A & C)|(A & D) == A & (C|D) - V1 = A, V2 = C, V3 = D; - else if (A == D) // (A & C)|(B & A) == A & (B|C) - V1 = A, V2 = B, V3 = C; - else if (C == B) // (A & C)|(C & D) == C & (A|D) - V1 = C, V2 = A, V3 = D; - else if (C == D) // (A & C)|(B & C) == C & (A|B) - V1 = C, V2 = A, V3 = B; - - if (V1) { - Value *Or = Builder->CreateOr(V2, V3, "tmp"); - return BinaryOperator::CreateAnd(V1, Or); - } - } - - // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants - if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) - return Match; - - // ((A&~B)|(~A&B)) -> A^B - if ((match(C, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, D); - // ((~B&A)|(~A&B)) -> A^B - if ((match(A, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, D); - // ((A&~B)|(B&~A)) -> A^B - if ((match(C, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, B); - // ((~B&A)|(B&~A)) -> A^B - if ((match(A, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, B); - } - - // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. - if (BinaryOperator *SI1 = dyn_cast(Op1)) { - if (BinaryOperator *SI0 = dyn_cast(Op0)) - if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && - SI0->getOperand(1) == SI1->getOperand(1) && - (SI0->hasOneUse() || SI1->hasOneUse())) { - Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), - SI0->getName()); - return BinaryOperator::Create(SI1->getOpcode(), NewOp, - SI1->getOperand(1)); - } - } - - // ((A|B)&1)|(B&-2) -> (A&1) | B - if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); - if (Ret) return Ret; - } - // (B&-2)|((A|B)&1) -> (A&1) | B - if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); - if (Ret) return Ret; - } - - // (~A | ~B) == (~(A & B)) - De Morgan's Law - if (Value *Op0NotVal = dyn_castNotVal(Op0)) - if (Value *Op1NotVal = dyn_castNotVal(Op1)) - if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(And); - } - - // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) - if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) { - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) - return R; - - if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) - if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) - return Res; - } - - // fold (or (cast A), (cast B)) -> (cast (or A, B)) - if (CastInst *Op0C = dyn_cast(Op0)) { - if (CastInst *Op1C = dyn_cast(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - if (!isa(Op0C->getOperand(0)) || - !isa(Op1C->getOperand(0))) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be - // generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType(), TD) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType(), TD)) { - Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - } - } - } - - - // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) - if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) - return Res; - } - - return Changed ? &I : 0; -} - -namespace { - -// XorSelf - Implements: X ^ X --> 0 -struct XorSelf { - Value *RHS; - XorSelf(Value *rhs) : RHS(rhs) {} - bool shouldApply(Value *LHS) const { return LHS == RHS; } - Instruction *apply(BinaryOperator &Xor) const { - return &Xor; - } -}; - -} - -Instruction *InstCombiner::visitXor(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa(Op1)) { - if (isa(Op0)) - // Handle undef ^ undef -> 0 special case. This is a common - // idiom (misuse). - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef - } - - // xor X, X = 0, even if X is nested in a sequence of Xor's. - if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { - assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(I)) - return &I; - if (isa(I.getType())) - if (isa(Op1)) - return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X - - // Is this a ~ operation? - if (Value *NotOp = dyn_castNotVal(&I)) { - if (BinaryOperator *Op0I = dyn_cast(NotOp)) { - if (Op0I->getOpcode() == Instruction::And || - Op0I->getOpcode() == Instruction::Or) { - // ~(~X & Y) --> (X | ~Y) - De Morgan's Law - // ~(~X | Y) === (X & ~Y) - De Morgan's Law - if (dyn_castNotVal(Op0I->getOperand(1))) - Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(Op0NotVal, NotY); - return BinaryOperator::CreateAnd(Op0NotVal, NotY); - } - - // ~(X & Y) --> (~X | ~Y) - De Morgan's Law - // ~(X | Y) === (~X & ~Y) - De Morgan's Law - if (isFreeToInvert(Op0I->getOperand(0)) && - isFreeToInvert(Op0I->getOperand(1))) { - Value *NotX = - Builder->CreateNot(Op0I->getOperand(0), "notlhs"); - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), "notrhs"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(NotX, NotY); - return BinaryOperator::CreateAnd(NotX, NotY); - } - } - } - } - - - if (ConstantInt *RHS = dyn_cast(Op1)) { - if (RHS->isOne() && Op0->hasOneUse()) { - // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B - if (ICmpInst *ICI = dyn_cast(Op0)) - return new ICmpInst(ICI->getInversePredicate(), - ICI->getOperand(0), ICI->getOperand(1)); - - if (FCmpInst *FCI = dyn_cast(Op0)) - return new FCmpInst(FCI->getInversePredicate(), - FCI->getOperand(0), FCI->getOperand(1)); - } - - // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). - if (CastInst *Op0C = dyn_cast(Op0)) { - if (CmpInst *CI = dyn_cast(Op0C->getOperand(0))) { - if (CI->hasOneUse() && Op0C->hasOneUse()) { - Instruction::CastOps Opcode = Op0C->getOpcode(); - if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && - (RHS == ConstantExpr::getCast(Opcode, - ConstantInt::getTrue(I.getContext()), - Op0C->getDestTy()))) { - CI->setPredicate(CI->getInversePredicate()); - return CastInst::Create(Opcode, CI, Op0C->getType()); - } - } - } - } - - if (BinaryOperator *Op0I = dyn_cast(Op0)) { - // ~(c-X) == X-c-1 == X+(-c-1) - if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) - if (Constant *Op0I0C = dyn_cast(Op0I->getOperand(0))) { - Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); - Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, - ConstantInt::get(I.getType(), 1)); - return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); - } - - if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) { - if (Op0I->getOpcode() == Instruction::Add) { - // ~(X-c) --> (-c-1)-X - if (RHS->isAllOnesValue()) { - Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); - return BinaryOperator::CreateSub( - ConstantExpr::getSub(NegOp0CI, - ConstantInt::get(I.getType(), 1)), - Op0I->getOperand(0)); - } else if (RHS->getValue().isSignBit()) { - // (X + C) ^ signbit -> (X + C + signbit) - Constant *C = ConstantInt::get(I.getContext(), - RHS->getValue() + Op0CI->getValue()); - return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); - - } - } else if (Op0I->getOpcode() == Instruction::Or) { - // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 - if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { - Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); - // Anything in both C1 and C2 is known to be zero, remove it from - // NewRHS. - Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); - NewRHS = ConstantExpr::getAnd(NewRHS, - ConstantExpr::getNot(CommonBits)); - Worklist.Add(Op0I); - I.setOperand(0, Op0I->getOperand(0)); - I.setOperand(1, NewRHS); - return &I; - } - } - } - } - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 - if (X == Op1) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 - if (X == Op0) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - - BinaryOperator *Op1I = dyn_cast(Op1); - if (Op1I) { - Value *A, *B; - if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { - if (A == Op0) { // B^(B|A) == (A|B)^B - Op1I->swapOperands(); - I.swapOperands(); - std::swap(Op0, Op1); - } else if (B == Op0) { // B^(A|B) == (A|B)^B - I.swapOperands(); // Simplified below. - std::swap(Op0, Op1); - } - } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // A^(A^B) == B - } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { - return ReplaceInstUsesWith(I, A); // A^(B^A) == B - } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && - Op1I->hasOneUse()){ - if (A == Op0) { // A^(A&B) -> A^(B&A) - Op1I->swapOperands(); - std::swap(A, B); - } - if (B == Op0) { // A^(B&A) -> (B&A)^A - I.swapOperands(); // Simplified below. - std::swap(Op0, Op1); - } - } - } - - BinaryOperator *Op0I = dyn_cast(Op0); - if (Op0I) { - Value *A, *B; - if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && - Op0I->hasOneUse()) { - if (A == Op1) // (B|A)^B == (A|B)^B - std::swap(A, B); - if (B == Op1) // (A|B)^B == A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); - } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // (A^B)^A == B - } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { - return ReplaceInstUsesWith(I, A); // (B^A)^A == B - } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && - Op0I->hasOneUse()){ - if (A == Op1) // (A&B)^A -> (B&A)^A - std::swap(A, B); - if (B == Op1 && // (B&A)^A == ~B & A - !isa(Op1)) { // Canonical form is (B&C)^C - return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); - } - } - } - - // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. - if (Op0I && Op1I && Op0I->isShift() && - Op0I->getOpcode() == Op1I->getOpcode() && - Op0I->getOperand(1) == Op1I->getOperand(1) && - (Op1I->hasOneUse() || Op1I->hasOneUse())) { - Value *NewOp = - Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), - Op0I->getName()); - return BinaryOperator::Create(Op1I->getOpcode(), NewOp, - Op1I->getOperand(1)); - } - - if (Op0I && Op1I) { - Value *A, *B, *C, *D; - // (A & B)^(A | B) -> A ^ B - if (match(Op0I, m_And(m_Value(A), m_Value(B))) && - match(Op1I, m_Or(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - // (A | B)^(A & B) -> A ^ B - if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && - match(Op1I, m_And(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - - // (A & B)^(C & D) - if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && - match(Op0I, m_And(m_Value(A), m_Value(B))) && - match(Op1I, m_And(m_Value(C), m_Value(D)))) { - // (X & Y)^(X & Y) -> (Y^Z) & X - Value *X = 0, *Y = 0, *Z = 0; - if (A == C) - X = A, Y = B, Z = D; - else if (A == D) - X = A, Y = B, Z = C; - else if (B == C) - X = B, Y = A, Z = D; - else if (B == D) - X = B, Y = A, Z = C; - - if (X) { - Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); - return BinaryOperator::CreateAnd(NewOp, X); - } - } - } - - // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) - if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) - return R; - - // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) - if (CastInst *Op0C = dyn_cast(Op0)) { - if (CastInst *Op1C = dyn_cast(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType(), TD) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType(), TD)) { - Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - } - } - - return Changed ? &I : 0; -} - -static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { - return cast(ConstantExpr::getExtractElement(V, Idx)); -} - -static bool HasAddOverflow(ConstantInt *Result, - ConstantInt *In1, ConstantInt *In2, - bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().ult(In1->getValue()); -} - -/// AddWithOverflow - Compute Result = In1+In2, returning true if the result -/// overflowed for this type. -static bool AddWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, bool IsSigned = false) { - Result = ConstantExpr::getAdd(In1, In2); - - if (const VectorType *VTy = dyn_cast(In1->getType())) { - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); - if (HasAddOverflow(ExtractElement(Result, Idx), - ExtractElement(In1, Idx), - ExtractElement(In2, Idx), - IsSigned)) - return true; - } - return false; - } - - return HasAddOverflow(cast(Result), - cast(In1), cast(In2), - IsSigned); -} - -static bool HasSubOverflow(ConstantInt *Result, - ConstantInt *In1, ConstantInt *In2, - bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().ugt(In1->getValue()); -} - -/// SubWithOverflow - Compute Result = In1-In2, returning true if the result -/// overflowed for this type. -static bool SubWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, bool IsSigned = false) { - Result = ConstantExpr::getSub(In1, In2); - - if (const VectorType *VTy = dyn_cast(In1->getType())) { - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); - if (HasSubOverflow(ExtractElement(Result, Idx), - ExtractElement(In1, Idx), - ExtractElement(In2, Idx), - IsSigned)) - return true; - } - return false; - } - - return HasSubOverflow(cast(Result), - cast(In1), cast(In2), - IsSigned); -} - - -/// FoldGEPICmp - Fold comparisons between a GEP instruction and something -/// else. At this point we know that the GEP is on the LHS of the comparison. -Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, - Instruction &I) { - // Look through bitcasts. - if (BitCastInst *BCI = dyn_cast(RHS)) - RHS = BCI->getOperand(0); - - Value *PtrBase = GEPLHS->getOperand(0); - if (TD && PtrBase == RHS && GEPLHS->isInBounds()) { - // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). - // This transformation (ignoring the base and scales) is valid because we - // know pointers can't overflow since the gep is inbounds. See if we can - // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); - - // If not, synthesize the offset the hard way. - if (Offset == 0) - Offset = EmitGEPOffset(GEPLHS, *this); - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, - Constant::getNullValue(Offset->getType())); - } else if (GEPOperator *GEPRHS = dyn_cast(RHS)) { - // If the base pointers are different, but the indices are the same, just - // compare the base pointer. - if (PtrBase != GEPRHS->getOperand(0)) { - bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); - IndicesTheSame &= GEPLHS->getOperand(0)->getType() == - GEPRHS->getOperand(0)->getType(); - if (IndicesTheSame) - for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) - if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { - IndicesTheSame = false; - break; - } - - // If all indices are the same, just compare the base pointers. - if (IndicesTheSame) - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), - GEPLHS->getOperand(0), GEPRHS->getOperand(0)); - - // Otherwise, the base pointers are different and the indices are - // different, bail out. - return 0; - } - - // If one of the GEPs has all zero indices, recurse. - bool AllZeros = true; - for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) - if (!isa(GEPLHS->getOperand(i)) || - !cast(GEPLHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) - return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), - ICmpInst::getSwappedPredicate(Cond), I); - - // If the other GEP has all zero indices, recurse. - AllZeros = true; - for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) - if (!isa(GEPRHS->getOperand(i)) || - !cast(GEPRHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) - return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); - - if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { - // If the GEPs only differ by one index, compare it. - unsigned NumDifferences = 0; // Keep track of # differences. - unsigned DiffOperand = 0; // The operand that differs. - for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) - if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { - if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != - GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { - // Irreconcilable differences. - NumDifferences = 2; - break; - } else { - if (NumDifferences++) break; - DiffOperand = i; - } - } - - if (NumDifferences == 0) // SAME GEP? - return ReplaceInstUsesWith(I, // No comparison is needed here. - ConstantInt::get(Type::getInt1Ty(I.getContext()), - ICmpInst::isTrueWhenEqual(Cond))); - - else if (NumDifferences == 1) { - Value *LHSV = GEPLHS->getOperand(DiffOperand); - Value *RHSV = GEPRHS->getOperand(DiffOperand); - // Make sure we do a signed comparison here. - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); - } - } - - // Only lower this if the icmp is the only user of the GEP or if we expect - // the result to fold to a constant! - if (TD && - (isa(GEPLHS) || GEPLHS->hasOneUse()) && - (isa(GEPRHS) || GEPRHS->hasOneUse())) { - // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) - Value *L = EmitGEPOffset(GEPLHS, *this); - Value *R = EmitGEPOffset(GEPRHS, *this); - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); - } - } - return 0; -} - -/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. -/// -Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, - Instruction *LHSI, - Constant *RHSC) { - if (!isa(RHSC)) return 0; - const APFloat &RHS = cast(RHSC)->getValueAPF(); - - // Get the width of the mantissa. We don't want to hack on conversions that - // might lose information from the integer, e.g. "i64 -> float" - int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); - if (MantissaWidth == -1) return 0; // Unknown. - - // Check to see that the input is converted from an integer type that is small - // enough that preserves all bits. TODO: check here for "known" sign bits. - // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. - unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); - - // If this is a uitofp instruction, we need an extra bit to hold the sign. - bool LHSUnsigned = isa(LHSI); - if (LHSUnsigned) - ++InputSize; - - // If the conversion would lose info, don't hack on this. - if ((int)InputSize > MantissaWidth) - return 0; - - // Otherwise, we can potentially simplify the comparison. We know that it - // will always come through as an integer value and we know the constant is - // not a NAN (it would have been previously simplified). - assert(!RHS.isNaN() && "NaN comparison not already folded!"); - - ICmpInst::Predicate Pred; - switch (I.getPredicate()) { - default: llvm_unreachable("Unexpected predicate!"); - case FCmpInst::FCMP_UEQ: - case FCmpInst::FCMP_OEQ: - Pred = ICmpInst::ICMP_EQ; - break; - case FCmpInst::FCMP_UGT: - case FCmpInst::FCMP_OGT: - Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; - break; - case FCmpInst::FCMP_UGE: - case FCmpInst::FCMP_OGE: - Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; - break; - case FCmpInst::FCMP_ULT: - case FCmpInst::FCMP_OLT: - Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; - break; - case FCmpInst::FCMP_ULE: - case FCmpInst::FCMP_OLE: - Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; - break; - case FCmpInst::FCMP_UNE: - case FCmpInst::FCMP_ONE: - Pred = ICmpInst::ICMP_NE; - break; - case FCmpInst::FCMP_ORD: - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - case FCmpInst::FCMP_UNO: - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - } - - const IntegerType *IntTy = cast(LHSI->getOperand(0)->getType()); - - // Now we know that the APFloat is a normal number, zero or inf. - - // See if the FP constant is too large for the integer. For example, - // comparing an i8 to 300.0. - unsigned IntWidth = IntTy->getScalarSizeInBits(); - - if (!LHSUnsigned) { - // If the RHS value is > SignedMax, fold the comparison. This handles +INF - // and large values. - APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false); - SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, - APFloat::rmNearestTiesToEven); - if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 - if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || - Pred == ICmpInst::ICMP_SLE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - } - } else { - // If the RHS value is > UnsignedMax, fold the comparison. This handles - // +INF and large values. - APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false); - UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, - APFloat::rmNearestTiesToEven); - if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 - if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || - Pred == ICmpInst::ICMP_ULE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - } - } - - if (!LHSUnsigned) { - // See if the RHS value is < SignedMin. - APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); - SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, - APFloat::rmNearestTiesToEven); - if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 - if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || - Pred == ICmpInst::ICMP_SGE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - } - } - - // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or - // [0, UMAX], but it may still be fractional. See if it is fractional by - // casting the FP value to the integer value and back, checking for equality. - // Don't do this for zero, because -0.0 is not fractional. - Constant *RHSInt = LHSUnsigned - ? ConstantExpr::getFPToUI(RHSC, IntTy) - : ConstantExpr::getFPToSI(RHSC, IntTy); - if (!RHS.isZero()) { - bool Equal = LHSUnsigned - ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC - : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; - if (!Equal) { - // If we had a comparison against a fractional value, we have to adjust - // the compare predicate and sometimes the value. RHSC is rounded towards - // zero at this point. - switch (Pred) { - default: llvm_unreachable("Unexpected integer comparison!"); - case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - case ICmpInst::ICMP_ULE: - // (float)int <= 4.4 --> int <= 4 - // (float)int <= -4.4 --> false - if (RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - break; - case ICmpInst::ICMP_SLE: - // (float)int <= 4.4 --> int <= 4 - // (float)int <= -4.4 --> int < -4 - if (RHS.isNegative()) - Pred = ICmpInst::ICMP_SLT; - break; - case ICmpInst::ICMP_ULT: - // (float)int < -4.4 --> false - // (float)int < 4.4 --> int <= 4 - if (RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - Pred = ICmpInst::ICMP_ULE; - break; - case ICmpInst::ICMP_SLT: - // (float)int < -4.4 --> int < -4 - // (float)int < 4.4 --> int <= 4 - if (!RHS.isNegative()) - Pred = ICmpInst::ICMP_SLE; - break; - case ICmpInst::ICMP_UGT: - // (float)int > 4.4 --> int > 4 - // (float)int > -4.4 --> true - if (RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - break; - case ICmpInst::ICMP_SGT: - // (float)int > 4.4 --> int > 4 - // (float)int > -4.4 --> int >= -4 - if (RHS.isNegative()) - Pred = ICmpInst::ICMP_SGE; - break; - case ICmpInst::ICMP_UGE: - // (float)int >= -4.4 --> true - // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - Pred = ICmpInst::ICMP_UGT; - break; - case ICmpInst::ICMP_SGE: - // (float)int >= -4.4 --> int >= -4 - // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) - Pred = ICmpInst::ICMP_SGT; - break; - } - } - } - - // Lower this FP comparison into an appropriate integer version of the - // comparison. - return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); -} - -/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: -/// cmp pred (load (gep GV, ...)), cmpcst -/// where GV is a global variable with a constant initializer. Try to simplify -/// this into some simple computation that does not need the load. For example -/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". -/// -/// If AndCst is non-null, then the loaded value is masked with that constant -/// before doing the comparison. This handles cases like "A[i]&4 == 0". -Instruction *InstCombiner:: -FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, - CmpInst &ICI, ConstantInt *AndCst) { - ConstantArray *Init = dyn_cast(GV->getInitializer()); - if (Init == 0 || Init->getNumOperands() > 1024) return 0; - - // There are many forms of this optimization we can handle, for now, just do - // the simple index into a single-dimensional array. - // - // Require: GEP GV, 0, i {{, constant indices}} - if (GEP->getNumOperands() < 3 || - !isa(GEP->getOperand(1)) || - !cast(GEP->getOperand(1))->isZero() || - isa(GEP->getOperand(2))) - return 0; - - // Check that indices after the variable are constants and in-range for the - // type they index. Collect the indices. This is typically for arrays of - // structs. - SmallVector LaterIndices; - - const Type *EltTy = cast(Init->getType())->getElementType(); - for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { - ConstantInt *Idx = dyn_cast(GEP->getOperand(i)); - if (Idx == 0) return 0; // Variable index. - - uint64_t IdxVal = Idx->getZExtValue(); - if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. - - if (const StructType *STy = dyn_cast(EltTy)) - EltTy = STy->getElementType(IdxVal); - else if (const ArrayType *ATy = dyn_cast(EltTy)) { - if (IdxVal >= ATy->getNumElements()) return 0; - EltTy = ATy->getElementType(); - } else { - return 0; // Unknown type. - } - - LaterIndices.push_back(IdxVal); - } - - enum { Overdefined = -3, Undefined = -2 }; - - // Variables for our state machines. - - // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form - // "i == 47 | i == 87", where 47 is the first index the condition is true for, - // and 87 is the second (and last) index. FirstTrueElement is -2 when - // undefined, otherwise set to the first true element. SecondTrueElement is - // -2 when undefined, -3 when overdefined and >= 0 when that index is true. - int FirstTrueElement = Undefined, SecondTrueElement = Undefined; - - // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the - // form "i != 47 & i != 87". Same state transitions as for true elements. - int FirstFalseElement = Undefined, SecondFalseElement = Undefined; - - /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these - /// define a state machine that triggers for ranges of values that the index - /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. - /// This is -2 when undefined, -3 when overdefined, and otherwise the last - /// index in the range (inclusive). We use -2 for undefined here because we - /// use relative comparisons and don't want 0-1 to match -1. - int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; - - // MagicBitvector - This is a magic bitvector where we set a bit if the - // comparison is true for element 'i'. If there are 64 elements or less in - // the array, this will fully represent all the comparison results. - uint64_t MagicBitvector = 0; - - - // Scan the array and see if one of our patterns matches. - Constant *CompareRHS = cast(ICI.getOperand(1)); - for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { - Constant *Elt = Init->getOperand(i); - - // If this is indexing an array of structures, get the structure element. - if (!LaterIndices.empty()) - Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), - LaterIndices.size()); - - // If the element is masked, handle it. - if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); - - // Find out if the comparison would be true or false for the i'th element. - Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, - CompareRHS, TD); - // If the result is undef for this element, ignore it. - if (isa(C)) { - // Extend range state machines to cover this element in case there is an - // undef in the middle of the range. - if (TrueRangeEnd == (int)i-1) - TrueRangeEnd = i; - if (FalseRangeEnd == (int)i-1) - FalseRangeEnd = i; - continue; - } - - // If we can't compute the result for any of the elements, we have to give - // up evaluating the entire conditional. - if (!isa(C)) return 0; - - // Otherwise, we know if the comparison is true or false for this element, - // update our state machines. - bool IsTrueForElt = !cast(C)->isZero(); - - // State machine for single/double/range index comparison. - if (IsTrueForElt) { - // Update the TrueElement state machine. - if (FirstTrueElement == Undefined) - FirstTrueElement = TrueRangeEnd = i; // First true element. - else { - // Update double-compare state machine. - if (SecondTrueElement == Undefined) - SecondTrueElement = i; - else - SecondTrueElement = Overdefined; - - // Update range state machine. - if (TrueRangeEnd == (int)i-1) - TrueRangeEnd = i; - else - TrueRangeEnd = Overdefined; - } - } else { - // Update the FalseElement state machine. - if (FirstFalseElement == Undefined) - FirstFalseElement = FalseRangeEnd = i; // First false element. - else { - // Update double-compare state machine. - if (SecondFalseElement == Undefined) - SecondFalseElement = i; - else - SecondFalseElement = Overdefined; - - // Update range state machine. - if (FalseRangeEnd == (int)i-1) - FalseRangeEnd = i; - else - FalseRangeEnd = Overdefined; - } - } - - - // If this element is in range, update our magic bitvector. - if (i < 64 && IsTrueForElt) - MagicBitvector |= 1ULL << i; - - // If all of our states become overdefined, bail out early. Since the - // predicate is expensive, only check it every 8 elements. This is only - // really useful for really huge arrays. - if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && - SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && - FalseRangeEnd == Overdefined) - return 0; - } - - // Now that we've scanned the entire array, emit our new comparison(s). We - // order the state machines in complexity of the generated code. - Value *Idx = GEP->getOperand(2); - - - // If the comparison is only true for one or two elements, emit direct - // comparisons. - if (SecondTrueElement != Overdefined) { - // None true -> false. - if (FirstTrueElement == Undefined) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext())); - - Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); - - // True for one element -> 'i == 47'. - if (SecondTrueElement == Undefined) - return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); - - // True for two elements -> 'i == 47 | i == 72'. - Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); - Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); - Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); - return BinaryOperator::CreateOr(C1, C2); - } - - // If the comparison is only false for one or two elements, emit direct - // comparisons. - if (SecondFalseElement != Overdefined) { - // None false -> true. - if (FirstFalseElement == Undefined) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext())); - - Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); - - // False for one element -> 'i != 47'. - if (SecondFalseElement == Undefined) - return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); - - // False for two elements -> 'i != 47 & i != 72'. - Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); - Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); - Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); - return BinaryOperator::CreateAnd(C1, C2); - } - - // If the comparison can be replaced with a range comparison for the elements - // where it is true, emit the range check. - if (TrueRangeEnd != Overdefined) { - assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); - - // Generate (i-FirstTrue) getType(), -FirstTrueElement); - Idx = Builder->CreateAdd(Idx, Offs); - } - - Value *End = ConstantInt::get(Idx->getType(), - TrueRangeEnd-FirstTrueElement+1); - return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); - } - - // False range check. - if (FalseRangeEnd != Overdefined) { - assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); - // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). - if (FirstFalseElement) { - Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); - Idx = Builder->CreateAdd(Idx, Offs); - } - - Value *End = ConstantInt::get(Idx->getType(), - FalseRangeEnd-FirstFalseElement); - return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); - } - - - // If a 32-bit or 64-bit magic bitvector captures the entire comparison state - // of this load, replace it with computation that does: - // ((magic_cst >> i) & 1) != 0 - if (Init->getNumOperands() <= 32 || - (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { - const Type *Ty; - if (Init->getNumOperands() <= 32) - Ty = Type::getInt32Ty(Init->getContext()); - else - Ty = Type::getInt64Ty(Init->getContext()); - Value *V = Builder->CreateIntCast(Idx, Ty, false); - V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); - V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); - return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); - } - - return 0; -} - - -Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { - bool Changed = false; - - /// Orders the operands of the compare so that they are listed from most - /// complex to least complex. This puts constants before unary operators, - /// before binary operators. - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { - I.swapOperands(); - Changed = true; - } - - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - // Simplify 'fcmp pred X, X' - if (Op0 == Op1) { - switch (I.getPredicate()) { - default: llvm_unreachable("Unknown predicate!"); - case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) - case FCmpInst::FCMP_ULT: // True if unordered or less than - case FCmpInst::FCMP_UGT: // True if unordered or greater than - case FCmpInst::FCMP_UNE: // True if unordered or not equal - // Canonicalize these to be 'fcmp uno %X, 0.0'. - I.setPredicate(FCmpInst::FCMP_UNO); - I.setOperand(1, Constant::getNullValue(Op0->getType())); - return &I; - - case FCmpInst::FCMP_ORD: // True if ordered (no nans) - case FCmpInst::FCMP_OEQ: // True if ordered and equal - case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal - case FCmpInst::FCMP_OLE: // True if ordered and less than or equal - // Canonicalize these to be 'fcmp ord %X, 0.0'. - I.setPredicate(FCmpInst::FCMP_ORD); - I.setOperand(1, Constant::getNullValue(Op0->getType())); - return &I; - } - } - - // Handle fcmp with constant RHS - if (Constant *RHSC = dyn_cast(Op1)) { - if (Instruction *LHSI = dyn_cast(Op0)) - switch (LHSI->getOpcode()) { - case Instruction::PHI: - // Only fold fcmp into the PHI if the phi and fcmp are in the same - // block. If in the same block, we're encouraging jump threading. If - // not, we are just pessimizing the code by making an i1 phi. - if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) - return NV; - break; - case Instruction::SIToFP: - case Instruction::UIToFP: - if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) - return NV; - break; - case Instruction::Select: { - // If either operand of the select is a constant, we can fold the - // comparison into the select arms, which will cause one to be - // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; - if (LHSI->hasOneUse()) { - if (Constant *C = dyn_cast(LHSI->getOperand(1))) { - // Fold the known value into the constant operand. - Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); - // Insert a new FCmp of the other select operand. - Op2 = Builder->CreateFCmp(I.getPredicate(), - LHSI->getOperand(2), RHSC, I.getName()); - } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { - // Fold the known value into the constant operand. - Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); - // Insert a new FCmp of the other select operand. - Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), - RHSC, I.getName()); - } - } - - if (Op1) - return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); - break; - } - case Instruction::Load: - if (GetElementPtrInst *GEP = - dyn_cast(LHSI->getOperand(0))) { - if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer() && - !cast(LHSI)->isVolatile()) - if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) - return Res; - } - break; - } - } - - return Changed ? &I : 0; -} - -Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { - bool Changed = false; - - /// Orders the operands of the compare so that they are listed from most - /// complex to least complex. This puts constants before unary operators, - /// before binary operators. - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { - I.swapOperands(); - Changed = true; - } - - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - const Type *Ty = Op0->getType(); - - // icmp's with boolean values can always be turned into bitwise operations - if (Ty == Type::getInt1Ty(I.getContext())) { - switch (I.getPredicate()) { - default: llvm_unreachable("Invalid icmp instruction!"); - case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); - return BinaryOperator::CreateNot(Xor); - } - case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B - return BinaryOperator::CreateXor(Op0, Op1); - - case ICmpInst::ICMP_UGT: - std::swap(Op0, Op1); // Change icmp ugt -> icmp ult - // FALL THROUGH - case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); - return BinaryOperator::CreateAnd(Not, Op1); - } - case ICmpInst::ICMP_SGT: - std::swap(Op0, Op1); // Change icmp sgt -> icmp slt - // FALL THROUGH - case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); - return BinaryOperator::CreateAnd(Not, Op0); - } - case ICmpInst::ICMP_UGE: - std::swap(Op0, Op1); // Change icmp uge -> icmp ule - // FALL THROUGH - case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); - return BinaryOperator::CreateOr(Not, Op1); - } - case ICmpInst::ICMP_SGE: - std::swap(Op0, Op1); // Change icmp sge -> icmp sle - // FALL THROUGH - case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); - return BinaryOperator::CreateOr(Not, Op0); - } - } - } - - unsigned BitWidth = 0; - if (TD) - BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - else if (Ty->isIntOrIntVector()) - BitWidth = Ty->getScalarSizeInBits(); - - bool isSignBit = false; - - // See if we are doing a comparison with a constant. - if (ConstantInt *CI = dyn_cast(Op1)) { - Value *A = 0, *B = 0; - - // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) - if (I.isEquality() && CI->isZero() && - match(Op0, m_Sub(m_Value(A), m_Value(B)))) { - // (icmp cond A B) if cond is equality - return new ICmpInst(I.getPredicate(), A, B); - } - - // If we have an icmp le or icmp ge instruction, turn it into the - // appropriate icmp lt or icmp gt instruction. This allows us to rely on - // them being folded in the code below. The SimplifyICmpInst code has - // already handled the edge cases for us, so we just assert on them. - switch (I.getPredicate()) { - default: break; - case ICmpInst::ICMP_ULE: - assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE - return new ICmpInst(ICmpInst::ICMP_ULT, Op0, - AddOne(CI)); - case ICmpInst::ICMP_SLE: - assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - AddOne(CI)); - case ICmpInst::ICMP_UGE: - assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE - return new ICmpInst(ICmpInst::ICMP_UGT, Op0, - SubOne(CI)); - case ICmpInst::ICMP_SGE: - assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, - SubOne(CI)); - } - - // If this comparison is a normal comparison, it demands all - // bits, if it is a sign bit comparison, it only demands the sign bit. - bool UnusedBit; - isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); - } - - // See if we can fold the comparison based on range information we can get - // by checking whether bits are known to be zero or one in the input. - if (BitWidth != 0) { - APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); - APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); - - if (SimplifyDemandedBits(I.getOperandUse(0), - isSignBit ? APInt::getSignBit(BitWidth) - : APInt::getAllOnesValue(BitWidth), - Op0KnownZero, Op0KnownOne, 0)) - return &I; - if (SimplifyDemandedBits(I.getOperandUse(1), - APInt::getAllOnesValue(BitWidth), - Op1KnownZero, Op1KnownOne, 0)) - return &I; - - // Given the known and unknown bits, compute a range that the LHS could be - // in. Compute the Min, Max and RHS values based on the known bits. For the - // EQ and NE we use unsigned values. - APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); - APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); - if (I.isSigned()) { - ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, - Op0Min, Op0Max); - ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, - Op1Min, Op1Max); - } else { - ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, - Op0Min, Op0Max); - ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, - Op1Min, Op1Max); - } - - // If Min and Max are known to be the same, then SimplifyDemandedBits - // figured out that the LHS is a constant. Just constant fold this now so - // that code below can assume that Min != Max. - if (!isa(Op0) && Op0Min == Op0Max) - return new ICmpInst(I.getPredicate(), - ConstantInt::get(I.getContext(), Op0Min), Op1); - if (!isa(Op1) && Op1Min == Op1Max) - return new ICmpInst(I.getPredicate(), Op0, - ConstantInt::get(I.getContext(), Op1Min)); - - // Based on the range information we know about the LHS, see if we can - // simplify this comparison. For example, (x&4) < 8 is always true. - switch (I.getPredicate()) { - default: llvm_unreachable("Unknown icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - break; - case ICmpInst::ICMP_NE: - if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - break; - case ICmpInst::ICMP_ULT: - if (Op0Max.ult(Op1Min)) // A true if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - if (Op0Min.uge(Op1Max)) // A false if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - if (Op1Min == Op0Max) // A A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast(Op1)) { - if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - SubOne(CI)); - - // (x (x >s -1) -> true if sign bit clear - if (CI->isMinValue(true)) - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, - Constant::getAllOnesValue(Op0->getType())); - } - break; - case ICmpInst::ICMP_UGT: - if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - - if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast(Op1)) { - if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - AddOne(CI)); - - // (x >u 2147483647) -> (x true if sign bit set - if (CI->isMaxValue(true)) - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - Constant::getNullValue(Op0->getType())); - } - break; - case ICmpInst::ICMP_SLT: - if (Op0Max.slt(Op1Min)) // A true if max(A) < min(C) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - if (Op0Min.sge(Op1Max)) // A false if min(A) >= max(C) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - if (Op1Min == Op0Max) // A A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast(Op1)) { - if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - SubOne(CI)); - } - break; - case ICmpInst::ICMP_SGT: - if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - - if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast(Op1)) { - if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - AddOne(CI)); - } - break; - case ICmpInst::ICMP_SGE: - assert(!isa(Op1) && "ICMP_SGE with ConstantInt not folded!"); - if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - break; - case ICmpInst::ICMP_SLE: - assert(!isa(Op1) && "ICMP_SLE with ConstantInt not folded!"); - if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - break; - case ICmpInst::ICMP_UGE: - assert(!isa(Op1) && "ICMP_UGE with ConstantInt not folded!"); - if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - break; - case ICmpInst::ICMP_ULE: - assert(!isa(Op1) && "ICMP_ULE with ConstantInt not folded!"); - if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - break; - } - - // Turn a signed comparison into an unsigned one if both operands - // are known to have the same sign. - if (I.isSigned() && - ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || - (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) - return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); - } - - // Test if the ICmpInst instruction is used exclusively by a select as - // part of a minimum or maximum operation. If so, refrain from doing - // any other folding. This helps out other analyses which understand - // non-obfuscated minimum and maximum idioms, such as ScalarEvolution - // and CodeGen. And in this case, at least one of the comparison - // operands has at least one user besides the compare (the select), - // which would often largely negate the benefit of folding anyway. - if (I.hasOneUse()) - if (SelectInst *SI = dyn_cast(*I.use_begin())) - if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || - (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) - return 0; - - // See if we are doing a comparison between a constant and an instruction that - // can be folded into the comparison. - if (ConstantInt *CI = dyn_cast(Op1)) { - // Since the RHS is a ConstantInt (CI), if the left hand side is an - // instruction, see if that instruction also has constants so that the - // instruction can be folded into the icmp - if (Instruction *LHSI = dyn_cast(Op0)) - if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) - return Res; - } - - // Handle icmp with constant (but not simple integer constant) RHS - if (Constant *RHSC = dyn_cast(Op1)) { - if (Instruction *LHSI = dyn_cast(Op0)) - switch (LHSI->getOpcode()) { - case Instruction::GetElementPtr: - // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null - if (RHSC->isNullValue() && - cast(LHSI)->hasAllZeroIndices()) - return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), - Constant::getNullValue(LHSI->getOperand(0)->getType())); - break; - case Instruction::PHI: - // Only fold icmp into the PHI if the phi and icmp are in the same - // block. If in the same block, we're encouraging jump threading. If - // not, we are just pessimizing the code by making an i1 phi. - if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) - return NV; - break; - case Instruction::Select: { - // If either operand of the select is a constant, we can fold the - // comparison into the select arms, which will cause one to be - // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; - if (Constant *C = dyn_cast(LHSI->getOperand(1))) - Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - if (Constant *C = dyn_cast(LHSI->getOperand(2))) - Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - - // We only want to perform this transformation if it will not lead to - // additional code. This is true if either both sides of the select - // fold to a constant (in which case the icmp is replaced with a select - // which will usually simplify) or this is the only user of the - // select (in which case we are trading a select+icmp for a simpler - // select+icmp). - if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { - if (!Op1) - Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), - RHSC, I.getName()); - if (!Op2) - Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), - RHSC, I.getName()); - return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); - } - break; - } - case Instruction::Call: - // If we have (malloc != null), and if the malloc has a single use, we - // can assume it is successful and remove the malloc. - if (isMalloc(LHSI) && LHSI->hasOneUse() && - isa(RHSC)) { - // Need to explicitly erase malloc call here, instead of adding it to - // Worklist, because it won't get DCE'd from the Worklist since - // isInstructionTriviallyDead() returns false for function calls. - // It is OK to replace LHSI/MallocCall with Undef because the - // instruction that uses it will be erased via Worklist. - if (extractMallocCall(LHSI)) { - LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); - EraseInstFromFunction(*LHSI); - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(I.getContext()), - !I.isTrueWhenEqual())); - } - if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) - if (MallocCall->hasOneUse()) { - MallocCall->replaceAllUsesWith( - UndefValue::get(MallocCall->getType())); - EraseInstFromFunction(*MallocCall); - Worklist.Add(LHSI); // The malloc's bitcast use. - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(I.getContext()), - !I.isTrueWhenEqual())); - } - } - break; - case Instruction::IntToPtr: - // icmp pred inttoptr(X), null -> icmp pred X, 0 - if (RHSC->isNullValue() && TD && - TD->getIntPtrType(RHSC->getContext()) == - LHSI->getOperand(0)->getType()) - return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), - Constant::getNullValue(LHSI->getOperand(0)->getType())); - break; - - case Instruction::Load: - // Try to optimize things like "A[i] > 4" to index computations. - if (GetElementPtrInst *GEP = - dyn_cast(LHSI->getOperand(0))) { - if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer() && - !cast(LHSI)->isVolatile()) - if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) - return Res; - } - break; - } - } - - // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. - if (GEPOperator *GEP = dyn_cast(Op0)) - if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) - return NI; - if (GEPOperator *GEP = dyn_cast(Op1)) - if (Instruction *NI = FoldGEPICmp(GEP, Op0, - ICmpInst::getSwappedPredicate(I.getPredicate()), I)) - return NI; - - // Test to see if the operands of the icmp are casted versions of other - // values. If the ptr->ptr cast can be stripped off both arguments, we do so - // now. - if (BitCastInst *CI = dyn_cast(Op0)) { - if (isa(Op0->getType()) && - (isa(Op1) || isa(Op1))) { - // We keep moving the cast from the left operand over to the right - // operand, where it can often be eliminated completely. - Op0 = CI->getOperand(0); - - // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast - // so eliminate it as well. - if (BitCastInst *CI2 = dyn_cast(Op1)) - Op1 = CI2->getOperand(0); - - // If Op1 is a constant, we can fold the cast into the constant. - if (Op0->getType() != Op1->getType()) { - if (Constant *Op1C = dyn_cast(Op1)) { - Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); - } else { - // Otherwise, cast the RHS right before the icmp - Op1 = Builder->CreateBitCast(Op1, Op0->getType()); - } - } - return new ICmpInst(I.getPredicate(), Op0, Op1); - } - } - - if (isa(Op0)) { - // Handle the special case of: icmp (cast bool to X), - // This comes up when you have code like - // int X = A < B; - // if (X) ... - // For generality, we handle any zero-extension of any operand comparison - // with a constant or another cast from the same type. - if (isa(Op1) || isa(Op1)) - if (Instruction *R = visitICmpInstWithCastAndCast(I)) - return R; - } - - // See if it's the same type of instruction on the left and right. - if (BinaryOperator *Op0I = dyn_cast(Op0)) { - if (BinaryOperator *Op1I = dyn_cast(Op1)) { - if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && - Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { - switch (Op0I->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Xor: - if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b - return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), - Op1I->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { - if (CI->getValue().isSignBit()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - - if (CI->getValue().isMaxSignedValue()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - Pred = I.getSwappedPredicate(Pred); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - } - break; - case Instruction::Mul: - if (!I.isEquality()) - break; - - if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get(I.getContext(), - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - - AP.countTrailingZeros())); - Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); - return new ICmpInst(I.getPredicate(), And1, And2); - } - } - break; - } - } - } - } - - // ~x < ~y --> y < x - { Value *A, *B; - if (match(Op0, m_Not(m_Value(A))) && - match(Op1, m_Not(m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, A); - } - - if (I.isEquality()) { - Value *A, *B, *C, *D; - - // -x == -y --> x == y - if (match(Op0, m_Neg(m_Value(A))) && - match(Op1, m_Neg(m_Value(B)))) - return new ICmpInst(I.getPredicate(), A, B); - - if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 - Value *OtherVal = A == Op1 ? B : A; - return new ICmpInst(I.getPredicate(), OtherVal, - Constant::getNullValue(A->getType())); - } - - if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { - // A^c1 == C^c2 --> A == C^(c1^c2) - ConstantInt *C1, *C2; - if (match(B, m_ConstantInt(C1)) && - match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { - Constant *NC = ConstantInt::get(I.getContext(), - C1->getValue() ^ C2->getValue()); - Value *Xor = Builder->CreateXor(C, NC, "tmp"); - return new ICmpInst(I.getPredicate(), A, Xor); - } - - // A^B == A^D -> B == D - if (A == C) return new ICmpInst(I.getPredicate(), B, D); - if (A == D) return new ICmpInst(I.getPredicate(), B, C); - if (B == C) return new ICmpInst(I.getPredicate(), A, D); - if (B == D) return new ICmpInst(I.getPredicate(), A, C); - } - } - - if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && - (A == Op0 || B == Op0)) { - // A == (A^B) -> B == 0 - Value *OtherVal = A == Op0 ? B : A; - return new ICmpInst(I.getPredicate(), OtherVal, - Constant::getNullValue(A->getType())); - } - - // (A-B) == A -> B == 0 - if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // A == (A-B) -> B == 0 - if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (Op0->hasOneUse() && Op1->hasOneUse() && - match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { - Value *X = 0, *Y = 0, *Z = 0; - - if (A == C) { - X = B; Y = D; Z = A; - } else if (A == D) { - X = B; Y = C; Z = A; - } else if (B == C) { - X = A; Y = D; Z = B; - } else if (B == D) { - X = A; Y = C; Z = B; - } - - if (X) { // Build (X^Y) & Z - Op1 = Builder->CreateXor(X, Y, "tmp"); - Op1 = Builder->CreateAnd(Op1, Z, "tmp"); - I.setOperand(0, Op1); - I.setOperand(1, Constant::getNullValue(Op1->getType())); - return &I; - } - } - } - - { - Value *X; ConstantInt *Cst; - // icmp X+Cst, X - if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) - return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0); - - // icmp X, X+Cst - if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) - return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1); - } - return Changed ? &I : 0; -} - -/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". -Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, - Value *X, ConstantInt *CI, - ICmpInst::Predicate Pred, - Value *TheAdd) { - // If we have X+0, exit early (simplifying logic below) and let it get folded - // elsewhere. icmp X+0, X -> icmp X, X - if (CI->isZero()) { - bool isTrue = ICmpInst::isTrueWhenEqual(Pred); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - - // (X+4) == X -> false. - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - - // (X+4) != X -> true. - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - - // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. - bool isNUW = false, isNSW = false; - if (BinaryOperator *Add = dyn_cast(TheAdd)) { - isNUW = Add->hasNoUnsignedWrap(); - isNSW = Add->hasNoSignedWrap(); - } - - // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, - // so the values can never be equal. Similiarly for all other "or equals" - // operators. - - // (X+1) X >u (MAXUINT-1) --> X != 255 - // (X+2) X >u (MAXUINT-2) --> X > 253 - // (X+MAXUINT) X >u (MAXUINT-MAXUINT) --> X != 0 - if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - // If this is an NUW add, then this is always false. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - - Value *R = ConstantExpr::getSub(ConstantInt::get(CI->getType(), -1ULL), CI); - return new ICmpInst(ICmpInst::ICMP_UGT, X, R); - } - - // (X+1) >u X --> X X != 255 - // (X+2) >u X --> X X u X --> X X X == 0 - if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { - // If this is an NUW add, then this is always true. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); - } - - unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); - ConstantInt *SMax = ConstantInt::get(X->getContext(), - APInt::getSignedMaxValue(BitWidth)); - - // (X+ 1) X >s (MAXSINT-1) --> X == 127 - // (X+ 2) X >s (MAXSINT-2) --> X >s 125 - // (X+MAXSINT) X >s (MAXSINT-MAXSINT) --> X >s 0 - // (X+MINSINT) X >s (MAXSINT-MINSINT) --> X >s -1 - // (X+ -2) X >s (MAXSINT- -2) --> X >s 126 - // (X+ -1) X >s (MAXSINT- -1) --> X != 127 - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always false, if negative, this is always true. - if (isNSW) { - bool isTrue = CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - - return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); - } - - // (X+ 1) >s X --> X X != 127 - // (X+ 2) >s X --> X X s X --> X X s X --> X X s X --> X X s X --> X X == -128 - - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always true, if negative, this is always false. - if (isNSW) { - bool isTrue = !CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - - assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); - Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); - return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); -} - -/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS -/// and CmpRHS are both known to be integer constants. -Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, - ConstantInt *DivRHS) { - ConstantInt *CmpRHS = cast(ICI.getOperand(1)); - const APInt &CmpRHSV = CmpRHS->getValue(); - - // FIXME: If the operand types don't match the type of the divide - // then don't attempt this transform. The code below doesn't have the - // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; - if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) - return 0; - if (DivRHS->isZero()) - return 0; // The ProdOV computation fails on divide by zero. - if (DivIsSigned && DivRHS->isAllOnesValue()) - return 0; // The overflow computation also screws up here - if (DivRHS->isOne()) - return 0; // Not worth bothering, and eliminates some funny cases - // with INT_MIN. - - // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. - Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); - - // Determine if the product overflows by seeing if the product is - // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. - bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : - ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; - - // Get the ICmp opcode - ICmpInst::Predicate Pred = ICI.getPredicate(); - - // Figure out the interval that is being checked. For example, a comparison - // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). - // Compute this interval based on the constants involved and the signedness of - // the compare/divide. This computes a half-open interval, keeping track of - // whether either value in the interval overflows. After analysis each - // overflow variable is set to 0 if it's corresponding bound variable is valid - // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. - int LoOverflow = 0, HiOverflow = 0; - Constant *LoBound = 0, *HiBound = 0; - - if (!DivIsSigned) { // udiv - // e.g. X/5 op 3 --> [15, 20) - LoBound = Prod; - HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); - } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. - if (CmpRHSV == 0) { // (X / pos) op 0 - // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; - } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos - LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) - HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); - } else { // (X / pos) op neg - // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) - HiBound = AddOne(Prod); - LoOverflow = HiOverflow = ProdOV ? -1 : 0; - if (!LoOverflow) { - ConstantInt* DivNeg = - cast(ConstantExpr::getNeg(DivRHS)); - LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; - } - } - } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. - if (CmpRHSV == 0) { // (X / neg) op 0 - // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(DivRHS); - HiBound = cast(ConstantExpr::getNeg(DivRHS)); - if (HiBound == DivRHS) { // -INTMIN = INTMIN - HiOverflow = 1; // [INTMIN+1, overflow) - HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN - } - } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos - // e.g. X/-5 op 3 --> [-19, -14) - HiBound = AddOne(Prod); - HiOverflow = LoOverflow = ProdOV ? -1 : 0; - if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0; - } else { // (X / neg) op neg - LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) - LoOverflow = HiOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true); - } - - // Dividing by a negative swaps the condition. LT <-> GT - Pred = ICmpInst::getSwappedPredicate(Pred); - } - - Value *X = DivI->getOperand(0); - switch (Pred) { - default: llvm_unreachable("Unhandled icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); - case ICmpInst::ICMP_NE: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - if (LoOverflow == +1) // Low bound is greater than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); - if (LoOverflow == -1) // Low bound is less than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - return new ICmpInst(Pred, X, LoBound); - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - if (HiOverflow == +1) // High bound greater than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - else if (HiOverflow == -1) // High bound less than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); - if (Pred == ICmpInst::ICMP_UGT) - return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); - } -} - - -/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". -/// -Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, - Instruction *LHSI, - ConstantInt *RHS) { - const APInt &RHSV = RHS->getValue(); - - switch (LHSI->getOpcode()) { - case Instruction::Trunc: - if (ICI.isEquality() && LHSI->hasOneUse()) { - // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all - // of the high bits truncated out of x are known. - unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), - SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); - APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); - APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); - - // If all the high bits are known, we can do this xform. - if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { - // Pull in the high bits from known-ones set. - APInt NewRHS(RHS->getValue()); - NewRHS.zext(SrcBits); - NewRHS |= KnownOne; - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantInt::get(ICI.getContext(), NewRHS)); - } - } - break; - - case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) - if (ConstantInt *XorCST = dyn_cast(LHSI->getOperand(1))) { - // If this is a comparison that tests the signbit (X < 0) or (x > -1), - // fold the xor. - if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || - (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { - Value *CompareVal = LHSI->getOperand(0); - - // If the sign bit of the XorCST is not set, there is no change to - // the operation, just stop using the Xor. - if (!XorCST->getValue().isNegative()) { - ICI.setOperand(0, CompareVal); - Worklist.Add(LHSI); - return &ICI; - } - - // Was the old condition true if the operand is positive? - bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; - - // If so, the new one isn't. - isTrueIfPositive ^= true; - - if (isTrueIfPositive) - return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, - SubOne(RHS)); - else - return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, - AddOne(RHS)); - } - - if (LHSI->hasOneUse()) { - // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) - if (!ICI.isEquality() && XorCST->getValue().isSignBit()) { - const APInt &SignBit = XorCST->getValue(); - ICmpInst::Predicate Pred = ICI.isSigned() - ? ICI.getUnsignedPredicate() - : ICI.getSignedPredicate(); - return new ICmpInst(Pred, LHSI->getOperand(0), - ConstantInt::get(ICI.getContext(), - RHSV ^ SignBit)); - } - - // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { - const APInt &NotSignBit = XorCST->getValue(); - ICmpInst::Predicate Pred = ICI.isSigned() - ? ICI.getUnsignedPredicate() - : ICI.getSignedPredicate(); - Pred = ICI.getSwappedPredicate(Pred); - return new ICmpInst(Pred, LHSI->getOperand(0), - ConstantInt::get(ICI.getContext(), - RHSV ^ NotSignBit)); - } - } - } - break; - case Instruction::And: // (icmp pred (and X, AndCST), RHS) - if (LHSI->hasOneUse() && isa(LHSI->getOperand(1)) && - LHSI->getOperand(0)->hasOneUse()) { - ConstantInt *AndCST = cast(LHSI->getOperand(1)); - - // If the LHS is an AND of a truncating cast, we can widen the - // and/compare to be the input width without changing the value - // produced, eliminating a cast. - if (TruncInst *Cast = dyn_cast(LHSI->getOperand(0))) { - // We can do this transformation if either the AND constant does not - // have its sign bit set or if it is an equality comparison. - // Extending a relational comparison when we're checking the sign - // bit would not work. - if (Cast->hasOneUse() && - (ICI.isEquality() || - (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { - uint32_t BitWidth = - cast(Cast->getOperand(0)->getType())->getBitWidth(); - APInt NewCST = AndCST->getValue(); - NewCST.zext(BitWidth); - APInt NewCI = RHSV; - NewCI.zext(BitWidth); - Value *NewAnd = - Builder->CreateAnd(Cast->getOperand(0), - ConstantInt::get(ICI.getContext(), NewCST), - LHSI->getName()); - return new ICmpInst(ICI.getPredicate(), NewAnd, - ConstantInt::get(ICI.getContext(), NewCI)); - } - } - - // If this is: (X >> C1) & C2 != C3 (where any shift and any compare - // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This - // happens a LOT in code produced by the C front-end, for bitfield - // access. - BinaryOperator *Shift = dyn_cast(LHSI->getOperand(0)); - if (Shift && !Shift->isShift()) - Shift = 0; - - ConstantInt *ShAmt; - ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : 0; - const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - const Type *AndTy = AndCST->getType(); // Type of the and. - - // We can fold this as long as we can't shift unknown bits - // into the mask. This can only happen with signed shift - // rights, as they sign-extend. - if (ShAmt) { - bool CanFold = Shift->isLogicalShift(); - if (!CanFold) { - // To test for the bad case of the signed shr, see if any - // of the bits shifted in could be tested after the mask. - uint32_t TyBits = Ty->getPrimitiveSizeInBits(); - int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); - - uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); - if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & - AndCST->getValue()) == 0) - CanFold = true; - } - - if (CanFold) { - Constant *NewCst; - if (Shift->getOpcode() == Instruction::Shl) - NewCst = ConstantExpr::getLShr(RHS, ShAmt); - else - NewCst = ConstantExpr::getShl(RHS, ShAmt); - - // Check to see if we are shifting out any of the bits being - // compared. - if (ConstantExpr::get(Shift->getOpcode(), - NewCst, ShAmt) != RHS) { - // If we shifted bits out, the fold is not going to work out. - // As a special case, check to see if this means that the - // result is always true or false now. - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, - ConstantInt::getFalse(ICI.getContext())); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, - ConstantInt::getTrue(ICI.getContext())); - } else { - ICI.setOperand(1, NewCst); - Constant *NewAndCST; - if (Shift->getOpcode() == Instruction::Shl) - NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); - else - NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); - LHSI->setOperand(1, NewAndCST); - LHSI->setOperand(0, Shift->getOperand(0)); - Worklist.Add(Shift); // Shift is dead. - return &ICI; - } - } - } - - // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is - // preferable because it allows the C<hasOneUse() && RHSV == 0 && - ICI.isEquality() && !Shift->isArithmeticShift() && - !isa(Shift->getOperand(0))) { - // Compute C << Y. - Value *NS; - if (Shift->getOpcode() == Instruction::LShr) { - NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); - } else { - // Insert a logical shift. - NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); - } - - // Compute X & (C << Y). - Value *NewAnd = - Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); - - ICI.setOperand(0, NewAnd); - return &ICI; - } - } - - // Try to optimize things like "A[i]&42 == 0" to index computations. - if (LoadInst *LI = dyn_cast(LHSI->getOperand(0))) { - if (GetElementPtrInst *GEP = - dyn_cast(LI->getOperand(0))) - if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer() && - !LI->isVolatile() && isa(LHSI->getOperand(1))) { - ConstantInt *C = cast(LHSI->getOperand(1)); - if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C)) - return Res; - } - } - break; - - case Instruction::Or: { - if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse()) - break; - Value *P, *Q; - if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { - // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 - // -> and (icmp eq P, null), (icmp eq Q, null). - - Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, - Constant::getNullValue(P->getType())); - Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, - Constant::getNullValue(Q->getType())); - Instruction *Op; - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - Op = BinaryOperator::CreateAnd(ICIP, ICIQ); - else - Op = BinaryOperator::CreateOr(ICIP, ICIQ); - return Op; - } - break; - } - - case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) - ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); - if (!ShAmt) break; - - uint32_t TypeBits = RHSV.getBitWidth(); - - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - if (ShAmt->uge(TypeBits)) - break; - - if (ICI.isEquality()) { - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - Constant *Comp = - ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), - ShAmt); - if (Comp != RHS) {// Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = - ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - Constant *Mask = - ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, - TypeBits-ShAmtVal)); - - Value *And = - Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); - return new ICmpInst(ICI.getPredicate(), And, - ConstantInt::get(ICI.getContext(), - RHSV.lshr(ShAmtVal))); - } - } - - // Otherwise, if this is a comparison of the sign bit, simplify to and/test. - bool TrueIfSigned = false; - if (LHSI->hasOneUse() && - isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { - // (X << 31) (X&1) != 0 - Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) << - (TypeBits-ShAmt->getZExtValue()-1)); - Value *And = - Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); - return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, - And, Constant::getNullValue(And->getType())); - } - break; - } - - case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) - case Instruction::AShr: { - // Only handle equality comparisons of shift-by-constant. - ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); - if (!ShAmt || !ICI.isEquality()) break; - - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - uint32_t TypeBits = RHSV.getBitWidth(); - if (ShAmt->uge(TypeBits)) - break; - - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - APInt Comp = RHSV << ShAmtVal; - if (LHSI->getOpcode() == Instruction::LShr) - Comp = Comp.lshr(ShAmtVal); - else - Comp = Comp.ashr(ShAmtVal); - - if (Comp != RHSV) { // Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), - IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - // Otherwise, check to see if the bits shifted out are known to be zero. - // If so, we can compare against the unshifted value: - // (X & 4) >> 1 == 2 --> (X & 4) == 4. - if (LHSI->hasOneUse() && - MaskedValueIsZero(LHSI->getOperand(0), - APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantExpr::getShl(RHS, ShAmt)); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(ICI.getContext(), Val); - - Value *And = Builder->CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - return new ICmpInst(ICI.getPredicate(), And, - ConstantExpr::getShl(RHS, ShAmt)); - } - break; - } - - case Instruction::SDiv: - case Instruction::UDiv: - // Fold: icmp pred ([us]div X, C1), C2 -> range test - // Fold this div into the comparison, producing a range check. - // Determine, based on the divide type, what the range is being - // checked. If there is an overflow on the low or high side, remember - // it, otherwise compute the range [low, hi) bounding the new value. - // See: InsertRangeTest above for the kinds of replacements possible. - if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) - if (Instruction *R = FoldICmpDivCst(ICI, cast(LHSI), - DivRHS)) - return R; - break; - - case Instruction::Add: - // Fold: icmp pred (add X, C1), C2 - if (!ICI.isEquality()) { - ConstantInt *LHSC = dyn_cast(LHSI->getOperand(1)); - if (!LHSC) break; - const APInt &LHSV = LHSC->getValue(); - - ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) - .subtract(LHSV); - - if (ICI.isSigned()) { - if (CR.getLower().isSignBit()) { - return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), - ConstantInt::get(ICI.getContext(),CR.getUpper())); - } else if (CR.getUpper().isSignBit()) { - return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), - ConstantInt::get(ICI.getContext(),CR.getLower())); - } - } else { - if (CR.getLower().isMinValue()) { - return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), - ConstantInt::get(ICI.getContext(),CR.getUpper())); - } else if (CR.getUpper().isMinValue()) { - return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), - ConstantInt::get(ICI.getContext(),CR.getLower())); - } - } - } - break; - } - - // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. - if (ICI.isEquality()) { - bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - - // If the first operand is (add|sub|and|or|xor|rem) with a constant, and - // the second operand is a constant, simplify a bit. - if (BinaryOperator *BO = dyn_cast(LHSI)) { - switch (BO->getOpcode()) { - case Instruction::SRem: - // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. - if (RHSV == 0 && isa(BO->getOperand(1)) &&BO->hasOneUse()){ - const APInt &V = cast(BO->getOperand(1))->getValue(); - if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { - Value *NewRem = - Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), - BO->getName()); - return new ICmpInst(ICI.getPredicate(), NewRem, - Constant::getNullValue(BO->getType())); - } - } - break; - case Instruction::Add: - // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. - if (ConstantInt *BOp1C = dyn_cast(BO->getOperand(1))) { - if (BO->hasOneUse()) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - ConstantExpr::getSub(RHS, BOp1C)); - } else if (RHSV == 0) { - // Replace ((add A, B) != 0) with (A != -B) if A or B is - // efficiently invertible, or if the add has just this one use. - Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - - if (Value *NegVal = dyn_castNegVal(BOp1)) - return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) - return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { - Value *Neg = Builder->CreateNeg(BOp1); - Neg->takeName(BO); - return new ICmpInst(ICI.getPredicate(), BOp0, Neg); - } - } - break; - case Instruction::Xor: - // For the xor case, we can xor two constants together, eliminating - // the explicit xor. - if (Constant *BOC = dyn_cast(BO->getOperand(1))) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - ConstantExpr::getXor(RHS, BOC)); - - // FALLTHROUGH - case Instruction::Sub: - // Replace (([sub|xor] A, B) != 0) with (A != B) - if (RHSV == 0) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - BO->getOperand(1)); - break; - - case Instruction::Or: - // If bits are being or'd in that are not present in the constant we - // are comparing against, then the comparison could never succeed! - if (Constant *BOC = dyn_cast(BO->getOperand(1))) { - Constant *NotCI = ConstantExpr::getNot(RHS); - if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) - return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(ICI.getContext()), - isICMP_NE)); - } - break; - - case Instruction::And: - if (ConstantInt *BOC = dyn_cast(BO->getOperand(1))) { - // If bits are being compared against that are and'd out, then the - // comparison can never succeed! - if ((RHSV & ~BOC->getValue()) != 0) - return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(ICI.getContext()), - isICMP_NE)); - - // If we have ((X & C) == C), turn it into ((X & C) != 0). - if (RHS == BOC && RHSV.isPowerOf2()) - return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : - ICmpInst::ICMP_NE, LHSI, - Constant::getNullValue(RHS->getType())); - - // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (BOC->getValue().isSignBit()) { - Value *X = BO->getOperand(0); - Constant *Zero = Constant::getNullValue(X->getType()); - ICmpInst::Predicate pred = isICMP_NE ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; - return new ICmpInst(pred, X, Zero); - } - - // ((X & ~7) == 0) --> X < 8 - if (RHSV == 0 && isHighOnes(BOC)) { - Value *X = BO->getOperand(0); - Constant *NegX = ConstantExpr::getNeg(BOC); - ICmpInst::Predicate pred = isICMP_NE ? - ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; - return new ICmpInst(pred, X, NegX); - } - } - default: break; - } - } else if (IntrinsicInst *II = dyn_cast(LHSI)) { - // Handle icmp {eq|ne} , intcst. - if (II->getIntrinsicID() == Intrinsic::bswap) { - Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); - ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap())); - return &ICI; - } - } - } - return 0; -} - -/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). -/// We only handle extending casts so far. -/// -Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { - const CastInst *LHSCI = cast(ICI.getOperand(0)); - Value *LHSCIOp = LHSCI->getOperand(0); - const Type *SrcTy = LHSCIOp->getType(); - const Type *DestTy = LHSCI->getType(); - Value *RHSCIOp; - - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the - // integer type is the same size as the pointer type. - if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && - TD->getPointerSizeInBits() == - cast(DestTy)->getBitWidth()) { - Value *RHSOp = 0; - if (Constant *RHSC = dyn_cast(ICI.getOperand(1))) { - RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); - } else if (PtrToIntInst *RHSC = dyn_cast(ICI.getOperand(1))) { - RHSOp = RHSC->getOperand(0); - // If the pointer types don't match, insert a bitcast. - if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); - } - - if (RHSOp) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); - } - - // The code below only handles extension cast instructions, so far. - // Enforce this. - if (LHSCI->getOpcode() != Instruction::ZExt && - LHSCI->getOpcode() != Instruction::SExt) - return 0; - - bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; - bool isSignedCmp = ICI.isSigned(); - - if (CastInst *CI = dyn_cast(ICI.getOperand(1))) { - // Not an extension from the same type? - RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) - return 0; - - // If the signedness of the two casts doesn't agree (i.e. one is a sext - // and the other is a zext), then we can't handle this. - if (CI->getOpcode() != LHSCI->getOpcode()) - return 0; - - // Deal with equality cases early. - if (ICI.isEquality()) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); - - // A signed comparison of sign extended values simplifies into a - // signed comparison. - if (isSignedCmp && isSignedExt) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); - - // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); - } - - // If we aren't dealing with a constant on the RHS, exit early - ConstantInt *CI = dyn_cast(ICI.getOperand(1)); - if (!CI) - return 0; - - // Compute the constant that would happen if we truncated to SrcTy then - // reextended to DestTy. - Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); - Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), - Res1, DestTy); - - // If the re-extended constant didn't change... - if (Res2 == CI) { - // Deal with equality cases early. - if (ICI.isEquality()) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); - - // A signed comparison of sign extended values simplifies into a - // signed comparison. - if (isSignedExt && isSignedCmp) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); - - // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); - } - - // The re-extended constant changed so the constant cannot be represented - // in the shorter type. Consequently, we cannot emit a simple comparison. - - // First, handle some easy cases. We know the result cannot be equal at this - // point so handle the ICI.isEquality() cases - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); - - // Evaluate the comparison for LT (we invert for GT below). LE and GE cases - // should have been folded away previously and not enter in here. - Value *Result; - if (isSignedCmp) { - // We're performing a signed comparison. - if (cast(CI)->getValue().isNegative()) - Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false - else - Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true - } else { - // We're performing an unsigned comparison. - if (isSignedExt) { - // We're performing an unsigned comp with a sign extended value. - // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); - } else { - // Unsigned extend & unsigned compare -> always true. - Result = ConstantInt::getTrue(ICI.getContext()); - } - } - - // Finally, return the value computed. - if (ICI.getPredicate() == ICmpInst::ICMP_ULT || - ICI.getPredicate() == ICmpInst::ICMP_SLT) - return ReplaceInstUsesWith(ICI, Result); - - assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || - ICI.getPredicate()==ICmpInst::ICMP_SGT) && - "ICmp should be folded!"); - if (Constant *CI = dyn_cast(Result)) - return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); - return BinaryOperator::CreateNot(Result); -} - -Instruction *InstCombiner::visitShl(BinaryOperator &I) { - return commonShiftTransforms(I); -} - -Instruction *InstCombiner::visitLShr(BinaryOperator &I) { - return commonShiftTransforms(I); -} - -Instruction *InstCombiner::visitAShr(BinaryOperator &I) { - if (Instruction *R = commonShiftTransforms(I)) - return R; - - Value *Op0 = I.getOperand(0); - - // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) - if (ConstantInt *CSI = dyn_cast(Op0)) - if (CSI->isAllOnesValue()) - return ReplaceInstUsesWith(I, CSI); - - // See if we can turn a signed shr into an unsigned shr. - if (MaskedValueIsZero(Op0, - APInt::getSignBit(I.getType()->getScalarSizeInBits()))) - return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); - - // Arithmetic shifting an all-sign-bit value is a no-op. - unsigned NumSignBits = ComputeNumSignBits(Op0); - if (NumSignBits == Op0->getType()->getScalarSizeInBits()) - return ReplaceInstUsesWith(I, Op0); - - return 0; -} - -Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { - assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // shl X, 0 == X and shr X, 0 == X - // shl 0, X == 0 and shr 0, X == 0 - if (Op1 == Constant::getNullValue(Op1->getType()) || - Op0 == Constant::getNullValue(Op0->getType())) - return ReplaceInstUsesWith(I, Op0); - - if (isa(Op0)) { - if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef - return ReplaceInstUsesWith(I, Op0); - else // undef << X -> 0, undef >>u X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa(Op1)) { - if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X - return ReplaceInstUsesWith(I, Op0); - else // X << undef, X >>u undef -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // See if we can fold away this shift. - if (SimplifyDemandedInstructionBits(I)) - return &I; - - // Try to fold constant and into select arguments. - if (isa(Op0)) - if (SelectInst *SI = dyn_cast(Op1)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - if (ConstantInt *CUI = dyn_cast(Op1)) - if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) - return Res; + // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. + if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) + return SelectInst::Create(Cond, C, B); + if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, B); + // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. + if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) + return SelectInst::Create(Cond, C, D); + if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, D); return 0; } -Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, - BinaryOperator &I) { - bool isLeftShift = I.getOpcode() == Instruction::Shl; - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - - // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate - // a signed shift. - // - if (Op1->uge(TypeBits)) { - if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); - else { - I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); - return &I; - } - } - - // ((X*C1) << C2) == (X * (C1 << C2)) - if (BinaryOperator *BO = dyn_cast(Op0)) - if (BO->getOpcode() == Instruction::Mul && isLeftShift) - if (Constant *BOOp = dyn_cast(BO->getOperand(1))) - return BinaryOperator::CreateMul(BO->getOperand(0), - ConstantExpr::getShl(BOOp, Op1)); - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - - // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) - if (TruncInst *TI = dyn_cast(Op0)) { - Instruction *TrOp = dyn_cast(TI->getOperand(0)); - // If 'shift2' is an ashr, we would have to get the sign bit into a funny - // place. Don't try to do this transformation in this case. Also, we - // require that the input operand is a shift-by-constant so that we have - // confidence that the shifts will get folded together. We could do this - // xform in more cases, but it is unlikely to be profitable. - if (TrOp && I.isLogicalShift() && TrOp->isShift() && - isa(TrOp->getOperand(1))) { - // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); - // (shift2 (shift1 & 0x00FF), c2) - Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); - - // For logical shifts, the truncation has the effect of making the high - // part of the register be zeros. Emulate this by inserting an AND to - // clear the top bits as needed. This 'and' will usually be zapped by - // other xforms later if dead. - unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); - unsigned DstSize = TI->getType()->getScalarSizeInBits(); - APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); - - // The mask we constructed says what the trunc would do if occurring - // between the shifts. We want to know the effect *after* the second - // shift. We know that it is a logical shift by a constant, so adjust the - // mask as appropriate. - if (I.getOpcode() == Instruction::Shl) - MaskV <<= Op1->getZExtValue(); - else { - assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV = MaskV.lshr(Op1->getZExtValue()); - } - - // shift1 & 0x00FF - Value *And = Builder->CreateAnd(NSh, - ConstantInt::get(I.getContext(), MaskV), - TI->getName()); - - // Return the value truncated to the interesting size. - return new TruncInst(And, I.getType()); - } - } - - if (Op0->hasOneUse()) { - if (BinaryOperator *Op0BO = dyn_cast(Op0)) { - // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) - Value *V1, *V2; - ConstantInt *CC; - switch (Op0BO->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // These operators commute. - // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) - if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && - match(Op0BO->getOperand(1), m_Shr(m_Value(V1), - m_Specific(Op1)))) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); - // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, - Op0BO->getOperand(1)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); - } - - // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) - Value *Op0BOOp1 = Op0BO->getOperand(1); - if (isLeftShift && Op0BOOp1->hasOneUse() && - match(Op0BOOp1, - m_And(m_Shr(m_Value(V1), m_Specific(Op1)), - m_ConstantInt(CC))) && - cast(Op0BOOp1)->getOperand(0)->hasOneUse()) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, - Op0BO->getName()); - // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); - return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); - } - } - - // FALL THROUGH. - case Instruction::Sub: { - // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) - if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && - match(Op0BO->getOperand(0), m_Shr(m_Value(V1), - m_Specific(Op1)))) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); - // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, - Op0BO->getOperand(0)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); - } - - // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) - if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && - match(Op0BO->getOperand(0), - m_And(m_Shr(m_Value(V1), m_Value(V2)), - m_ConstantInt(CC))) && V2 == Op1 && - cast(Op0BO->getOperand(0)) - ->getOperand(0)->hasOneUse()) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); - // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); - - return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); - } - - break; - } - } - - - // If the operand is an bitwise operator with a constant RHS, and the - // shift is the only use, we can pull it out of the shift. - if (ConstantInt *Op0C = dyn_cast(Op0BO->getOperand(1))) { - bool isValid = true; // Valid only for And, Or, Xor - bool highBitSet = false; // Transform if high bit of constant set? - - switch (Op0BO->getOpcode()) { - default: isValid = false; break; // Do not perform transform! - case Instruction::Add: - isValid = isLeftShift; - break; - case Instruction::Or: - case Instruction::Xor: - highBitSet = false; - break; - case Instruction::And: - highBitSet = true; - break; - } - - // If this is a signed shift right, and the high bit is modified - // by the logical operation, do not perform the transformation. - // The highBitSet boolean indicates the value of the high bit of - // the constant which would cause it to be modified for this - // operation. - // - if (isValid && I.getOpcode() == Instruction::AShr) - isValid = Op0C->getValue()[TypeBits-1] == highBitSet; - - if (isValid) { - Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); - - Value *NewShift = - Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); - NewShift->takeName(Op0BO); - - return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, - NewRHS); - } - } - } +/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. +Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, + ICmpInst *LHS, ICmpInst *RHS) { + Value *Val, *Val2; + ConstantInt *LHSCst, *RHSCst; + ICmpInst::Predicate LHSCC, RHSCC; + + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). + if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || + !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) + return 0; + + + // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) + if (LHSCst == RHSCst && LHSCC == RHSCC && + LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); } - // Find out if this is a shift of a shift by a constant. - BinaryOperator *ShiftOp = dyn_cast(Op0); - if (ShiftOp && !ShiftOp->isShift()) - ShiftOp = 0; + // From here on, we only handle: + // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; - if (ShiftOp && isa(ShiftOp->getOperand(1))) { - ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1)); - uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); - uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); - assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); - if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. - Value *X = ShiftOp->getOperand(0); - - uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - - const IntegerType *Ty = cast(I.getType()); - - // Check for (X << c1) << c2 and (X >> c1) >> c2 - if (I.getOpcode() == ShiftOp->getOpcode()) { - // If this is oversized composite shift, then unsigned shifts get 0, ashr - // saturates. - if (AmtSum >= TypeBits) { - if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // We can't fold (ugt x, C) | (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (CmpInst::isSigned(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + CmpInst::isSigned(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); + } + + // At this point, we know we have have two icmp instructions + // comparing a value against two constants and or'ing the result + // together. Because of the above check, we know that we only have + // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the + // FoldICmpLogical check above), that the two constants are not + // equal. + assert(LHSCst != RHSCst && "Compares not folded above?"); + + switch (LHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + if (LHSCst == SubOne(RHSCst)) { + // (X == 13 | X == 14) -> X-13 CreateAdd(Val, AddCST, Val->getName()+".off"); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); + return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); } - - return BinaryOperator::Create(I.getOpcode(), X, - ConstantInt::get(Ty, AmtSum)); + break; // (X == 13 | X == 15) -> no change + case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change + case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change + break; + case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 + case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 + return ReplaceInstUsesWith(I, RHS); } - - if (ShiftOp->getOpcode() == Instruction::LShr && - I.getOpcode() == Instruction::AShr) { - if (AmtSum >= TypeBits) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. - return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); + break; + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 + case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 + case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true + case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); } - - if (ShiftOp->getOpcode() == Instruction::AShr && - I.getOpcode() == Instruction::LShr) { - // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. - if (AmtSum >= TypeBits) - AmtSum = TypeBits-1; - - Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(), Mask)); + break; + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(false)) + return ReplaceInstUsesWith(I, LHS); + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), + false, false, I); + case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change + break; } - - // Okay, if we get here, one shift must be left, and the other shift must be - // right. See if the amounts are equal. - if (ShiftAmt1 == ShiftAmt2) { - // If we have ((X >>? C) << C), turn this into X & (-1 << C). - if (I.getOpcode() == Instruction::Shl) { - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, - ConstantInt::get(I.getContext(),Mask)); - } - // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). - if (I.getOpcode() == Instruction::LShr) { - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, - ConstantInt::get(I.getContext(), Mask)); - } - // We can simplify ((X << C) >>s C) into a trunc + sext. - // NOTE: we could do this for any C, but that would make 'unusual' integer - // types. For now, just stick to ones well-supported by the code - // generators. - const Type *SExtType = 0; - switch (Ty->getBitWidth() - ShiftAmt1) { - case 1 : - case 8 : - case 16 : - case 32 : - case 64 : - case 128: - SExtType = IntegerType::get(I.getContext(), - Ty->getBitWidth() - ShiftAmt1); - break; - default: break; - } - if (SExtType) - return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); - // Otherwise, we can't handle it yet. - } else if (ShiftAmt1 < ShiftAmt2) { - uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; - - // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(),Mask)); - } - - // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); - Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(),Mask)); - } - - // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. - } else { - assert(ShiftAmt2 < ShiftAmt1); - uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; + break; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(true)) + return ReplaceInstUsesWith(I, LHS); + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), + true, false, I); + case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 + case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 + case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true + case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change + break; + } + break; + } + return 0; +} - // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); - Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, - ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(),Mask)); - } - - // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); +Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + if (LHS->getPredicate() == FCmpInst::FCMP_UNO && + RHS->getPredicate() == FCmpInst::FCMP_UNO && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // true. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(),Mask)); + // Otherwise, no need to compare the two constants, compare the + // rest. + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); } - - // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp uno x,x" is "fcmp uno x, 0". + if (isa(LHS->getOperand(1)) && + isa(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); + + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, + Op0LHS, Op0RHS); + if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, LHS); + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op0Ordered == Op1Ordered) { + // If both are ordered or unordered, return a new fcmp with + // or'ed predicates. + Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS); + if (Instruction *I = dyn_cast(RV)) + return I; + // Otherwise, it's a constant boolean value... + return ReplaceInstUsesWith(I, RV); } } return 0; } - -/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear -/// expression. If so, decompose it, returning some value X, such that Val is -/// X*Scale+Offset. +/// FoldOrWithConstants - This helper function folds: /// -static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - int &Offset) { - assert(Val->getType() == Type::getInt32Ty(Val->getContext()) && - "Unexpected allocation size type!"); - if (ConstantInt *CI = dyn_cast(Val)) { - Offset = CI->getZExtValue(); - Scale = 0; - return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0); - } else if (BinaryOperator *I = dyn_cast(Val)) { - if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { - if (I->getOpcode() == Instruction::Shl) { - // This is a value scaled by '1 << the shift amt'. - Scale = 1U << RHS->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Mul) { - // This value is scaled by 'RHS'. - Scale = RHS->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, - // where C1 is divisible by C2. - unsigned SubScale; - Value *SubVal = - DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); - Offset += RHS->getZExtValue(); - Scale = SubScale; - return SubVal; - } - } +/// ((A | B) & C1) | (B & C2) +/// +/// into: +/// +/// (A & C1) | B +/// +/// when the XOR of the two constants is "all ones" (-1). +Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C) { + ConstantInt *CI1 = dyn_cast(C); + if (!CI1) return 0; + + Value *V1 = 0; + ConstantInt *CI2 = 0; + if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; + + APInt Xor = CI1->getValue() ^ CI2->getValue(); + if (!Xor.isAllOnesValue()) return 0; + + if (V1 == A || V1 == B) { + Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); + return BinaryOperator::CreateOr(NewOp, V1); } - // Otherwise, we can't look past this. - Scale = 1; - Offset = 0; - return Val; + return 0; } +Instruction *InstCombiner::visitOr(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); -/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, -/// try to eliminate the cast by moving the type information into the alloc. -Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, - AllocaInst &AI) { - const PointerType *PTy = cast(CI.getType()); - - BuilderTy AllocaBuilder(*Builder); - AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); + if (Value *V = SimplifyOrInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); - // Remove any uses of AI that are dead. - assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); - for (Value::use_iterator UI = AI.use_begin(), E = AI.use_end(); UI != E; ) { - Instruction *User = cast(*UI++); - if (isInstructionTriviallyDead(User)) { - while (UI != E && *UI == User) - ++UI; // If this instruction uses AI more than once, don't break UI. - - ++NumDeadInst; - DEBUG(errs() << "IC: DCE: " << *User << '\n'); - EraseInstFromFunction(*User); + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + if (ConstantInt *RHS = dyn_cast(Op1)) { + ConstantInt *C1 = 0; Value *X = 0; + // (X & C1) | C2 --> (X | C2) & (C1|C2) + if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && + isOnlyUse(Op0)) { + Value *Or = Builder->CreateOr(X, RHS); + Or->takeName(Op0); + return BinaryOperator::CreateAnd(Or, + ConstantInt::get(I.getContext(), + RHS->getValue() | C1->getValue())); } - } - // This requires TargetData to get the alloca alignment and size information. - if (!TD) return 0; + // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) + if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && + isOnlyUse(Op0)) { + Value *Or = Builder->CreateOr(X, RHS); + Or->takeName(Op0); + return BinaryOperator::CreateXor(Or, + ConstantInt::get(I.getContext(), + C1->getValue() & ~RHS->getValue())); + } - // Get the type really allocated and the type casted to. - const Type *AllocElTy = AI.getAllocatedType(); - const Type *CastElTy = PTy->getElementType(); - if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; - - unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); - unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); - if (CastElTyAlign < AllocElTyAlign) return 0; - - // If the allocation has multiple uses, only promote it if we are strictly - // increasing the alignment of the resultant allocation. If we keep it the - // same, we open the door to infinite loops of various kinds. (A reference - // from a dbg.declare doesn't count as a use for this purpose.) - if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && - CastElTyAlign == AllocElTyAlign) return 0; - - uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); - uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); - if (CastElTySize == 0 || AllocElTySize == 0) return 0; - - // See if we can satisfy the modulus by pulling a scale out of the array - // size argument. - unsigned ArraySizeScale; - int ArrayOffset; - Value *NumElements = // See if the array size is a decomposable linear expr. - DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); - - // If we can now satisfy the modulus, by using a non-1 scale, we really can - // do the xform. - if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || - (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; - - unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; - Value *Amt = 0; - if (Scale == 1) { - Amt = NumElements; - } else { - Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale); - // Insert before the alloca, not before the cast. - Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; } - - if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()), - Offset, true); - Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); + + Value *A = 0, *B = 0; + ConstantInt *C1 = 0, *C2 = 0; + + // (A | B) | C and A | (B | C) -> bswap if possible. + // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. + if (match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())) || + (match(Op0, m_Shift(m_Value(), m_Value())) && + match(Op1, m_Shift(m_Value(), m_Value())))) { + if (Instruction *BSwap = MatchBSwap(I)) + return BSwap; } - AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); - New->setAlignment(AI.getAlignment()); - New->takeName(&AI); - - // If the allocation has one real use plus a dbg.declare, just remove the - // declare. - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { - EraseInstFromFunction(*DI); - } - // If the allocation has multiple real uses, insert a cast and change all - // things that used it to use the new cast. This will also hack on CI, but it - // will die soon. - else if (!AI.hasOneUse()) { - // New is the allocation instruction, pointer typed. AI is the original - // allocation instruction, also pointer typed. Thus, cast to use is BitCast. - Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); - AI.replaceAllUsesWith(NewCast); + // (X^C)|Y -> (X|Y)^C iff Y&C == 0 + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op1, C1->getValue())) { + Value *NOr = Builder->CreateOr(A, Op1); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); } - return ReplaceInstUsesWith(CI, New); -} -/// CanEvaluateInDifferentType - Return true if we can take the specified value -/// and return it as type Ty without inserting any new casts and without -/// changing the computed value. This is used by code that tries to decide -/// whether promoting or shrinking integer operations to wider or smaller types -/// will allow us to eliminate a truncate or extend. -/// -/// This is a truncation operation if Ty is smaller than V->getType(), or an -/// extension operation if Ty is larger. -/// -/// If CastOpc is a truncation, then Ty will be a type smaller than V. We -/// should return true if trunc(V) can be computed by computing V in the smaller -/// type. If V is an instruction, then trunc(inst(x,y)) can be computed as -/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be -/// efficiently truncated. -/// -/// If CastOpc is a sext or zext, we are asking if the low bits of the value can -/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get -/// the final result. -bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, - unsigned CastOpc, - int &NumCastsRemoved){ - // We can always evaluate constants in another type. - if (isa(V)) - return true; - - Instruction *I = dyn_cast(V); - if (!I) return false; - - const Type *OrigTy = V->getType(); - - // If this is an extension or truncate, we can often eliminate it. - if (isa(I) || isa(I) || isa(I)) { - // If this is a cast from the destination type, we can trivially eliminate - // it, and this will remove a cast overall. - if (I->getOperand(0)->getType() == Ty) { - // If the first operand is itself a cast, and is eliminable, do not count - // this as an eliminable cast. We would prefer to eliminate those two - // casts first. - if (!isa(I->getOperand(0)) && I->hasOneUse()) - ++NumCastsRemoved; - return true; - } + // Y|(X^C) -> (X|Y)^C iff Y&C == 0 + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op0, C1->getValue())) { + Value *NOr = Builder->CreateOr(A, Op0); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); } - // We can't extend or shrink something that has multiple uses: doing so would - // require duplicating the instruction in general, which isn't profitable. - if (!I->hasOneUse()) return false; - - unsigned Opc = I->getOpcode(); - switch (Opc) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - // These operators can all arbitrarily be extended or truncated. - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved) && - CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, - NumCastsRemoved); - - case Instruction::UDiv: - case Instruction::URem: { - // UDiv and URem can be truncated if all the truncated bits are zero. - uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (BitWidth < OrigBitWidth) { - APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); - if (MaskedValueIsZero(I->getOperand(0), Mask) && - MaskedValueIsZero(I->getOperand(1), Mask)) { - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved) && - CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, - NumCastsRemoved); + // (A & C)|(B & D) + Value *C = 0, *D = 0; + if (match(Op0, m_And(m_Value(A), m_Value(C))) && + match(Op1, m_And(m_Value(B), m_Value(D)))) { + Value *V1 = 0, *V2 = 0, *V3 = 0; + C1 = dyn_cast(C); + C2 = dyn_cast(D); + if (C1 && C2) { // (A & C1)|(B & C2) + // If we have: ((V + N) & C1) | (V & C2) + // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + // replace with V+N. + if (C1->getValue() == ~C2->getValue()) { + if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ + match(A, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + return ReplaceInstUsesWith(I, A); + if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + return ReplaceInstUsesWith(I, A); + } + // Or commutes, try both ways. + if ((C1->getValue() & (C1->getValue()+1)) == 0 && + match(B, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + return ReplaceInstUsesWith(I, B); + if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + return ReplaceInstUsesWith(I, B); + } + } + + // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) + // iff (C1&C2) == 0 and (N&~C1) == 0 + if ((C1->getValue() & C2->getValue()) == 0) { + if (match(A, m_Or(m_Value(V1), m_Value(V2))) && + ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) + (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) + return BinaryOperator::CreateAnd(A, + ConstantInt::get(A->getContext(), + C1->getValue()|C2->getValue())); + // Or commutes, try both ways. + if (match(B, m_Or(m_Value(V1), m_Value(V2))) && + ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) + (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) + return BinaryOperator::CreateAnd(B, + ConstantInt::get(B->getContext(), + C1->getValue()|C2->getValue())); } } - break; - } - case Instruction::Shl: - // If we are truncating the result of this SHL, and if it's a shift of a - // constant amount, we can always perform a SHL in a smaller type. - if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (BitWidth < OrigTy->getScalarSizeInBits() && - CI->getLimitedValue(BitWidth) < BitWidth) - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved); - } - break; - case Instruction::LShr: - // If this is a truncate of a logical shr, we can truncate it to a smaller - // lshr iff we know that the bits we would otherwise be shifting in are - // already zeros. - if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { - uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (BitWidth < OrigBitWidth && - MaskedValueIsZero(I->getOperand(0), - APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && - CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved); + + // Check to see if we have any common things being and'ed. If so, find the + // terms for V1 & (V2|V3). + if (isOnlyUse(Op0) || isOnlyUse(Op1)) { + V1 = 0; + if (A == B) // (A & C)|(A & D) == A & (C|D) + V1 = A, V2 = C, V3 = D; + else if (A == D) // (A & C)|(B & A) == A & (B|C) + V1 = A, V2 = B, V3 = C; + else if (C == B) // (A & C)|(C & D) == C & (A|D) + V1 = C, V2 = A, V3 = D; + else if (C == D) // (A & C)|(B & C) == C & (A|B) + V1 = C, V2 = A, V3 = B; + + if (V1) { + Value *Or = Builder->CreateOr(V2, V3, "tmp"); + return BinaryOperator::CreateAnd(V1, Or); } } - break; - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::Trunc: - // If this is the same kind of case as our original (e.g. zext+zext), we - // can safely replace it. Note that replacing it does not reduce the number - // of casts in the input. - if (Opc == CastOpc) - return true; - // sext (zext ty1), ty2 -> zext ty2 - if (CastOpc == Instruction::SExt && Opc == Instruction::ZExt) - return true; - break; - case Instruction::Select: { - SelectInst *SI = cast(I); - return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc, - NumCastsRemoved) && - CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc, - NumCastsRemoved); - } - case Instruction::PHI: { - // We can change a phi if we can change all operands. - PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc, - NumCastsRemoved)) - return false; - return true; - } - default: - // TODO: Can handle more cases here. - break; + // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants + if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) + return Match; + + // ((A&~B)|(~A&B)) -> A^B + if ((match(C, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, D); + // ((~B&A)|(~A&B)) -> A^B + if ((match(A, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, D); + // ((A&~B)|(B&~A)) -> A^B + if ((match(C, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, B); + // ((~B&A)|(B&~A)) -> A^B + if ((match(A, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, B); } - return false; -} - -/// EvaluateInDifferentType - Given an expression that -/// CanEvaluateInDifferentType returns true for, actually insert the code to -/// evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, - bool isSigned) { - if (Constant *C = dyn_cast(V)) - return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); - - // Otherwise, it must be an instruction. - Instruction *I = cast(V); - Instruction *Res = 0; - unsigned Opc = I->getOpcode(); - switch (Opc) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::AShr: - case Instruction::LShr: - case Instruction::Shl: - case Instruction::UDiv: - case Instruction::URem: { - Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); - Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); - Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); - break; - } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - // If the source type of the cast is the type we're trying for then we can - // just return the source. There's no need to insert it because it is not - // new. - if (I->getOperand(0)->getType() == Ty) - return I->getOperand(0); - - // Otherwise, must be the same type of cast, so just reinsert a new one. - Res = CastInst::Create(cast(I)->getOpcode(), I->getOperand(0),Ty); - break; - case Instruction::Select: { - Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); - Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); - Res = SelectInst::Create(I->getOperand(0), True, False); - break; + // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast(Op1)) { + if (BinaryOperator *SI0 = dyn_cast(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } } - case Instruction::PHI: { - PHINode *OPN = cast(I); - PHINode *NPN = PHINode::Create(Ty); - for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { - Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); - NPN->addIncoming(V, OPN->getIncomingBlock(i)); - } - Res = NPN; - break; + + // ((A|B)&1)|(B&-2) -> (A&1) | B + if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || + match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { + Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); + if (Ret) return Ret; } - default: - // TODO: Can handle more cases here. - llvm_unreachable("Unreachable!"); - break; + // (B&-2)|((A|B)&1) -> (A&1) | B + if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || + match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { + Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); + if (Ret) return Ret; } - - Res->takeName(I); - return InsertNewInstBefore(Res, *I); -} - -/// @brief Implement the transforms common to all CastInst visitors. -Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { - Value *Src = CI.getOperand(0); - // Many cases of "cast of a cast" are eliminable. If it's eliminable we just - // eliminate it now. - if (CastInst *CSrc = dyn_cast(Src)) { // A->B->C cast - if (Instruction::CastOps opc = - isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { - // The first cast (CSrc) is eliminable so we need to fix up or replace - // the second cast (CI). CSrc will then have a good chance of being dead. - return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); - } - } + // (~A | ~B) == (~(A & B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(And); + } - // If we are casting a select then fold the cast into the select - if (SelectInst *SI = dyn_cast(Src)) - if (Instruction *NV = FoldOpIntoSelect(CI, SI, this)) - return NV; + // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) + if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) { + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) + return R; - // If we are casting a PHI then fold the cast into the PHI - if (isa(Src)) { - // We don't do this if this would create a PHI node with an illegal type if - // it is currently legal. - if (!isa(Src->getType()) || - !isa(CI.getType()) || - ShouldChangeType(CI.getType(), Src->getType(), TD)) - if (Instruction *NV = FoldOpIntoPhi(CI)) - return NV; + if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) + if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) + return Res; } - - return 0; -} - -/// FindElementAtOffset - Given a type and a constant offset, determine whether -/// or not there is a sequence of GEP indices into the type that will land us at -/// the specified offset. If so, fill them into NewIndices and return the -/// resultant element type, otherwise return null. -static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, - SmallVectorImpl &NewIndices, - const TargetData *TD) { - if (!TD) return 0; - if (!Ty->isSized()) return 0; - - // Start with the index over the outer type. Note that the type size - // might be zero (even if the offset isn't zero) if the indexed type - // is something like [0 x {int, int}] - const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); - int64_t FirstIdx = 0; - if (int64_t TySize = TD->getTypeAllocSize(Ty)) { - FirstIdx = Offset/TySize; - Offset -= FirstIdx*TySize; - // Handle hosts where % returns negative instead of values [0..TySize). - if (Offset < 0) { - --FirstIdx; - Offset += TySize; - assert(Offset >= 0); - } - assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); + // fold (or (cast A), (cast B)) -> (cast (or A, B)) + if (CastInst *Op0C = dyn_cast(Op0)) { + if (CastInst *Op1C = dyn_cast(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? + if (!isa(Op0C->getOperand(0)) || + !isa(Op1C->getOperand(0))) { + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && + // Only do this if the casts both really cause code to be + // generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + } } - NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); - - // Index into the types. If we fail, set OrigBase to null. - while (Offset) { - // Indexing into tail padding between struct/array elements. - if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) - return 0; - if (const StructType *STy = dyn_cast(Ty)) { - const StructLayout *SL = TD->getStructLayout(STy); - assert(Offset < (int64_t)SL->getSizeInBytes() && - "Offset must stay within the indexed type"); - - unsigned Elt = SL->getElementContainingOffset(Offset); - NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), - Elt)); - - Offset -= SL->getElementOffset(Elt); - Ty = STy->getElementType(Elt); - } else if (const ArrayType *AT = dyn_cast(Ty)) { - uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); - assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); - Offset %= EltSize; - Ty = AT->getElementType(); - } else { - // Otherwise, we can't index into the middle of this atomic type, bail. - return 0; - } + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) + return Res; } - - return Ty; + + return Changed ? &I : 0; } -/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) -Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { - Value *Src = CI.getOperand(0); - - if (GetElementPtrInst *GEP = dyn_cast(Src)) { - // If casting the result of a getelementptr instruction with no offset, turn - // this into a cast of the original pointer! - if (GEP->hasAllZeroIndices()) { - // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another - // pointer operand so the opcode doesn't need to change. - Worklist.Add(GEP); - CI.setOperand(0, GEP->getOperand(0)); - return &CI; - } - - // If the GEP has a single use, and the base pointer is a bitcast, and the - // GEP computes a constant offset, see if we can convert these three - // instructions into fewer. This typically happens with unions and other - // non-type-safe code. - if (TD && GEP->hasOneUse() && isa(GEP->getOperand(0))) { - if (GEP->hasAllConstantIndices()) { - // We are guaranteed to get a constant from EmitGEPOffset. - ConstantInt *OffsetV = cast(EmitGEPOffset(GEP, *this)); - int64_t Offset = OffsetV->getSExtValue(); - - // Get the base pointer input of the bitcast, and the type it points to. - Value *OrigBase = cast(GEP->getOperand(0))->getOperand(0); - const Type *GEPIdxTy = - cast(OrigBase->getType())->getElementType(); - SmallVector NewIndices; - if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD)) { - // If we were able to index down into an element, create the GEP - // and bitcast the result. This eliminates one bitcast, potentially - // two. - Value *NGEP = cast(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, - NewIndices.begin(), NewIndices.end()) : - Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); - NGEP->takeName(GEP); - - if (isa(CI)) - return new BitCastInst(NGEP, CI.getType()); - assert(isa(CI)); - return new PtrToIntInst(NGEP, CI.getType()); - } - } - } +namespace { + +// XorSelf - Implements: X ^ X --> 0 +struct XorSelf { + Value *RHS; + XorSelf(Value *rhs) : RHS(rhs) {} + bool shouldApply(Value *LHS) const { return LHS == RHS; } + Instruction *apply(BinaryOperator &Xor) const { + return &Xor; } - - return commonCastTransforms(CI); +}; + } -/// commonIntCastTransforms - This function implements the common transforms -/// for trunc, zext, and sext. -Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { - if (Instruction *Result = commonCastTransforms(CI)) - return Result; +Instruction *InstCombiner::visitXor(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); - uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); - uint32_t DestBitSize = DestTy->getScalarSizeInBits(); + if (isa(Op1)) { + if (isa(Op0)) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef + } - // See if we can simplify any instructions used by the LHS whose sole + // xor X, X = 0, even if X is nested in a sequence of Xor's. + if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { + assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(CI)) - return &CI; - - // If the source isn't an instruction or has more than one use then we - // can't do anything more. - Instruction *SrcI = dyn_cast(Src); - if (!SrcI || !Src->hasOneUse()) - return 0; + if (SimplifyDemandedInstructionBits(I)) + return &I; + if (isa(I.getType())) + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X - // Attempt to propagate the cast into the instruction for int->int casts. - int NumCastsRemoved = 0; - // Only do this if the dest type is a simple type, don't convert the - // expression tree to something weird like i93 unless the source is also - // strange. - if ((isa(DestTy) || - ShouldChangeType(SrcI->getType(), DestTy, TD)) && - CanEvaluateInDifferentType(SrcI, DestTy, - CI.getOpcode(), NumCastsRemoved)) { - // If this cast is a truncate, evaluting in a different type always - // eliminates the cast, so it is always a win. If this is a zero-extension, - // we need to do an AND to maintain the clear top-part of the computation, - // so we require that the input have eliminated at least one cast. If this - // is a sign extension, we insert two new casts (to do the extension) so we - // require that two casts have been eliminated. - bool DoXForm = false; - bool JustReplace = false; - switch (CI.getOpcode()) { - default: - // All the others use floating point so we shouldn't actually - // get here because of the check above. - llvm_unreachable("Unknown cast type"); - case Instruction::Trunc: - DoXForm = true; - break; - case Instruction::ZExt: { - DoXForm = NumCastsRemoved >= 1; - - if (!DoXForm && 0) { - // If it's unnecessary to issue an AND to clear the high bits, it's - // always profitable to do this xform. - Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, false); - APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); - if (MaskedValueIsZero(TryRes, Mask)) - return ReplaceInstUsesWith(CI, TryRes); - - if (Instruction *TryI = dyn_cast(TryRes)) - if (TryI->use_empty()) - EraseInstFromFunction(*TryI); - } - break; - } - case Instruction::SExt: { - DoXForm = NumCastsRemoved >= 2; - if (!DoXForm && !isa(SrcI) && 0) { - // If we do not have to emit the truncate + sext pair, then it's always - // profitable to do this xform. - // - // It's not safe to eliminate the trunc + sext pair if one of the - // eliminated cast is a truncate. e.g. - // t2 = trunc i32 t1 to i16 - // t3 = sext i16 t2 to i32 - // != - // i32 t1 - Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, true); - unsigned NumSignBits = ComputeNumSignBits(TryRes); - if (NumSignBits > (DestBitSize - SrcBitSize)) - return ReplaceInstUsesWith(CI, TryRes); + // Is this a ~ operation? + if (Value *NotOp = dyn_castNotVal(&I)) { + if (BinaryOperator *Op0I = dyn_cast(NotOp)) { + if (Op0I->getOpcode() == Instruction::And || + Op0I->getOpcode() == Instruction::Or) { + // ~(~X & Y) --> (X | ~Y) - De Morgan's Law + // ~(~X | Y) === (X & ~Y) - De Morgan's Law + if (dyn_castNotVal(Op0I->getOperand(1))) + Op0I->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(Op0NotVal, NotY); + return BinaryOperator::CreateAnd(Op0NotVal, NotY); + } - if (Instruction *TryI = dyn_cast(TryRes)) - if (TryI->use_empty()) - EraseInstFromFunction(*TryI); - } - break; - } - } - - if (DoXForm) { - DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid cast: " << CI); - Value *Res = EvaluateInDifferentType(SrcI, DestTy, - CI.getOpcode() == Instruction::SExt); - if (JustReplace) - // Just replace this cast with the result. - return ReplaceInstUsesWith(CI, Res); - - assert(Res->getType() == DestTy); - switch (CI.getOpcode()) { - default: llvm_unreachable("Unknown cast type!"); - case Instruction::Trunc: - // Just replace this cast with the result. - return ReplaceInstUsesWith(CI, Res); - case Instruction::ZExt: { - assert(SrcBitSize < DestBitSize && "Not a zext?"); - - // If the high bits are already zero, just replace this cast with the - // result. - APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); - if (MaskedValueIsZero(Res, Mask)) - return ReplaceInstUsesWith(CI, Res); - - // We need to emit an AND to clear the high bits. - Constant *C = ConstantInt::get(CI.getContext(), - APInt::getLowBitsSet(DestBitSize, SrcBitSize)); - return BinaryOperator::CreateAnd(Res, C); - } - case Instruction::SExt: { - // If the high bits are already filled with sign bit, just replace this - // cast with the result. - unsigned NumSignBits = ComputeNumSignBits(Res); - if (NumSignBits > (DestBitSize - SrcBitSize)) - return ReplaceInstUsesWith(CI, Res); - - // We need to emit a cast to truncate, then a cast to sext. - return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy); - } + // ~(X & Y) --> (~X | ~Y) - De Morgan's Law + // ~(X | Y) === (~X & ~Y) - De Morgan's Law + if (isFreeToInvert(Op0I->getOperand(0)) && + isFreeToInvert(Op0I->getOperand(1))) { + Value *NotX = + Builder->CreateNot(Op0I->getOperand(0), "notlhs"); + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), "notrhs"); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(NotX, NotY); + return BinaryOperator::CreateAnd(NotX, NotY); + } } } } - Value *Op0 = SrcI->getNumOperands() > 0 ? SrcI->getOperand(0) : 0; - Value *Op1 = SrcI->getNumOperands() > 1 ? SrcI->getOperand(1) : 0; - - switch (SrcI->getOpcode()) { - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - // If we are discarding information, rewrite. - if (DestBitSize < SrcBitSize && DestBitSize != 1) { - // Don't insert two casts unless at least one can be eliminated. - if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || - !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { - Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); - Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); - return BinaryOperator::Create( - cast(SrcI)->getOpcode(), Op0c, Op1c); - } - } - - // cast (xor bool X, true) to int --> xor (cast bool X to int), 1 - if (isa(CI) && SrcBitSize == 1 && - SrcI->getOpcode() == Instruction::Xor && - Op1 == ConstantInt::getTrue(CI.getContext()) && - (!Op0->hasOneUse() || !isa(Op0))) { - Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName()); - return BinaryOperator::CreateXor(New, - ConstantInt::get(CI.getType(), 1)); - } - break; - - case Instruction::Shl: { - // Canonicalize trunc inside shl, if we can. - ConstantInt *CI = dyn_cast(Op1); - if (CI && DestBitSize < SrcBitSize && - CI->getLimitedValue(DestBitSize) < DestBitSize) { - Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); - Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); - return BinaryOperator::CreateShl(Op0c, Op1c); - } - break; - } - } - return 0; -} + + if (ConstantInt *RHS = dyn_cast(Op1)) { + if (RHS->isOne() && Op0->hasOneUse()) { + // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B + if (ICmpInst *ICI = dyn_cast(Op0)) + return new ICmpInst(ICI->getInversePredicate(), + ICI->getOperand(0), ICI->getOperand(1)); -Instruction *InstCombiner::visitTrunc(TruncInst &CI) { - if (Instruction *Result = commonIntCastTransforms(CI)) - return Result; - - Value *Src = CI.getOperand(0); - const Type *Ty = CI.getType(); - uint32_t DestBitWidth = Ty->getScalarSizeInBits(); - uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits(); - - // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) - if (DestBitWidth == 1) { - Constant *One = ConstantInt::get(Src->getType(), 1); - Src = Builder->CreateAnd(Src, One, "tmp"); - Value *Zero = Constant::getNullValue(Src->getType()); - return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); - } - - // Optimize trunc(lshr(), c) to pull the shift through the truncate. - ConstantInt *ShAmtV = 0; - Value *ShiftOp = 0; - if (Src->hasOneUse() && - match(Src, m_LShr(m_Value(ShiftOp), m_ConstantInt(ShAmtV)))) { - uint32_t ShAmt = ShAmtV->getLimitedValue(SrcBitWidth); - - // Get a mask for the bits shifting in. - APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth)); - if (MaskedValueIsZero(ShiftOp, Mask)) { - if (ShAmt >= DestBitWidth) // All zeros. - return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty)); - - // Okay, we can shrink this. Truncate the input, then return a new - // shift. - Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName()); - Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty); - return BinaryOperator::CreateLShr(V1, V2); + if (FCmpInst *FCI = dyn_cast(Op0)) + return new FCmpInst(FCI->getInversePredicate(), + FCI->getOperand(0), FCI->getOperand(1)); } - } - - return 0; -} -/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations -/// in order to eliminate the icmp. -Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, - bool DoXform) { - // If we are just checking for a icmp eq of a single bit and zext'ing it - // to an integer, then shift the bit to the appropriate place and then - // cast to integer to avoid the comparison. - if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { - const APInt &Op1CV = Op1C->getValue(); - - // zext (x x>>u31 true if signbit set. - // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. - if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || - (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { - if (!DoXform) return ICI; - - Value *In = ICI->getOperand(0); - Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getScalarSizeInBits()-1); - In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); - if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); - - if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { - Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, In->getName()+".not"); + // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). + if (CastInst *Op0C = dyn_cast(Op0)) { + if (CmpInst *CI = dyn_cast(Op0C->getOperand(0))) { + if (CI->hasOneUse() && Op0C->hasOneUse()) { + Instruction::CastOps Opcode = Op0C->getOpcode(); + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + (RHS == ConstantExpr::getCast(Opcode, + ConstantInt::getTrue(I.getContext()), + Op0C->getDestTy()))) { + CI->setPredicate(CI->getInversePredicate()); + return CastInst::Create(Opcode, CI, Op0C->getType()); + } + } } - - return ReplaceInstUsesWith(CI, In); } - - - - // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. - // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - // zext (X == 1) to i32 --> X iff X has only the low bit set. - // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. - // zext (X != 0) to i32 --> X iff X has only the low bit set. - // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. - // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. - // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && - // This only works for EQ and NE - ICI->isEquality()) { - // If Op1C some other power of two, convert: - uint32_t BitWidth = Op1C->getType()->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); - - APInt KnownZeroMask(~KnownZero); - if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? - if (!DoXform) return ICI; - - bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; - if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { - // (X&4) == 2 --> false - // (X&4) != 2 --> true - Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), - isNE); - Res = ConstantExpr::getZExt(Res, CI.getType()); - return ReplaceInstUsesWith(CI, Res); - } - - uint32_t ShiftAmt = KnownZeroMask.logBase2(); - Value *In = ICI->getOperand(0); - if (ShiftAmt) { - // Perform a logical shr by shiftamt. - // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), - In->getName()+".lobit"); + + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + // ~(c-X) == X-c-1 == X+(-c-1) + if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) + if (Constant *Op0I0C = dyn_cast(Op0I->getOperand(0))) { + Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); + Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, + ConstantInt::get(I.getType(), 1)); + return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); } - if ((Op1CV != 0) == isNE) { // Toggle the low bit. - Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, "tmp"); + if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) { + if (Op0I->getOpcode() == Instruction::Add) { + // ~(X-c) --> (-c-1)-X + if (RHS->isAllOnesValue()) { + Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); + return BinaryOperator::CreateSub( + ConstantExpr::getSub(NegOp0CI, + ConstantInt::get(I.getType(), 1)), + Op0I->getOperand(0)); + } else if (RHS->getValue().isSignBit()) { + // (X + C) ^ signbit -> (X + C + signbit) + Constant *C = ConstantInt::get(I.getContext(), + RHS->getValue() + Op0CI->getValue()); + return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); + + } + } else if (Op0I->getOpcode() == Instruction::Or) { + // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 + if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { + Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); + // Anything in both C1 and C2 is known to be zero, remove it from + // NewRHS. + Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); + NewRHS = ConstantExpr::getAnd(NewRHS, + ConstantExpr::getNot(CommonBits)); + Worklist.Add(Op0I); + I.setOperand(0, Op0I->getOperand(0)); + I.setOperand(1, NewRHS); + return &I; + } } - - if (CI.getType() == In->getType()) - return ReplaceInstUsesWith(CI, In); - else - return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); } } - } - - // icmp ne A, B is equal to xor A, B when A and B only really have one bit. - // It is also profitable to transform icmp eq into not(xor(A, B)) because that - // may lead to additional simplifications. - if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { - if (const IntegerType *ITy = dyn_cast(CI.getType())) { - uint32_t BitWidth = ITy->getBitWidth(); - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); - - APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); - APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); - - if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { - APInt KnownBits = KnownZeroLHS | KnownOneLHS; - APInt UnknownBit = ~KnownBits; - if (UnknownBit.countPopulation() == 1) { - if (!DoXform) return ICI; - Value *Result = Builder->CreateXor(LHS, RHS); + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } - // Mask off any bits that are set and won't be shifted away. - if (KnownOneLHS.uge(UnknownBit)) - Result = Builder->CreateAnd(Result, - ConstantInt::get(ITy, UnknownBit)); + if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 + if (X == Op1) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - // Shift the bit we're testing down to the lsb. - Result = Builder->CreateLShr( - Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); + if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 + if (X == Op0) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); - Result->takeName(ICI); - return ReplaceInstUsesWith(CI, Result); - } + + BinaryOperator *Op1I = dyn_cast(Op1); + if (Op1I) { + Value *A, *B; + if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { + if (A == Op0) { // B^(B|A) == (A|B)^B + Op1I->swapOperands(); + I.swapOperands(); + std::swap(Op0, Op1); + } else if (B == Op0) { // B^(A|B) == (A|B)^B + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); + } + } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { + return ReplaceInstUsesWith(I, B); // A^(A^B) == B + } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { + return ReplaceInstUsesWith(I, A); // A^(B^A) == B + } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && + Op1I->hasOneUse()){ + if (A == Op0) { // A^(A&B) -> A^(B&A) + Op1I->swapOperands(); + std::swap(A, B); + } + if (B == Op0) { // A^(B&A) -> (B&A)^A + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); } } } - - return 0; -} - -Instruction *InstCombiner::visitZExt(ZExtInst &CI) { - // If one of the common conversion will work, do it. - if (Instruction *Result = commonIntCastTransforms(CI)) - return Result; - - Value *Src = CI.getOperand(0); - - // If this is a TRUNC followed by a ZEXT then we are dealing with integral - // types and if the sizes are just right we can convert this into a logical - // 'and' which will be much cheaper than the pair of casts. - if (TruncInst *CSrc = dyn_cast(Src)) { // A->B->C cast - // Get the sizes of the types involved. We know that the intermediate type - // will be smaller than A or C, but don't know the relation between A and C. - Value *A = CSrc->getOperand(0); - unsigned SrcSize = A->getType()->getScalarSizeInBits(); - unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); - unsigned DstSize = CI.getType()->getScalarSizeInBits(); - // If we're actually extending zero bits, then if - // SrcSize < DstSize: zext(a & mask) - // SrcSize == DstSize: a & mask - // SrcSize > DstSize: trunc(a) & mask - if (SrcSize < DstSize) { - APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - Constant *AndConst = ConstantInt::get(A->getType(), AndValue); - Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); - return new ZExtInst(And, CI.getType()); - } - - if (SrcSize == DstSize) { - APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), - AndValue)); - } - if (SrcSize > DstSize) { - Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); - APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, - ConstantInt::get(Trunc->getType(), - AndValue)); - } - } - - if (ICmpInst *ICI = dyn_cast(Src)) - return transformZExtICmp(ICI, CI); - - BinaryOperator *SrcI = dyn_cast(Src); - if (SrcI && SrcI->getOpcode() == Instruction::Or) { - // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one - // of the (zext icmp) will be transformed. - ICmpInst *LHS = dyn_cast(SrcI->getOperand(0)); - ICmpInst *RHS = dyn_cast(SrcI->getOperand(1)); - if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && - (transformZExtICmp(LHS, CI, false) || - transformZExtICmp(RHS, CI, false))) { - Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); - Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); - return BinaryOperator::Create(Instruction::Or, LCast, RCast); - } - } - - // zext(trunc(t) & C) -> (t & zext(C)). - if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) - if (ConstantInt *C = dyn_cast(SrcI->getOperand(1))) - if (TruncInst *TI = dyn_cast(SrcI->getOperand(0))) { - Value *TI0 = TI->getOperand(0); - if (TI0->getType() == CI.getType()) - return - BinaryOperator::CreateAnd(TI0, - ConstantExpr::getZExt(C, CI.getType())); + + BinaryOperator *Op0I = dyn_cast(Op0); + if (Op0I) { + Value *A, *B; + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()) { + if (A == Op1) // (B|A)^B == (A|B)^B + std::swap(A, B); + if (B == Op1) // (A|B)^B == A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); + } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { + return ReplaceInstUsesWith(I, B); // (A^B)^A == B + } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { + return ReplaceInstUsesWith(I, A); // (B^A)^A == B + } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()){ + if (A == Op1) // (A&B)^A -> (B&A)^A + std::swap(A, B); + if (B == Op1 && // (B&A)^A == ~B & A + !isa(Op1)) { // Canonical form is (B&C)^C + return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); } - - // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). - if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) - if (ConstantInt *C = dyn_cast(SrcI->getOperand(1))) - if (BinaryOperator *And = dyn_cast(SrcI->getOperand(0))) - if (And->getOpcode() == Instruction::And && And->hasOneUse() && - And->getOperand(1) == C) - if (TruncInst *TI = dyn_cast(And->getOperand(0))) { - Value *TI0 = TI->getOperand(0); - if (TI0->getType() == CI.getType()) { - Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); - Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); - return BinaryOperator::CreateXor(NewAnd, ZC); - } - } - - return 0; -} - -Instruction *InstCombiner::visitSExt(SExtInst &CI) { - if (Instruction *I = commonIntCastTransforms(CI)) - return I; - - Value *Src = CI.getOperand(0); - - // Canonicalize sign-extend from i1 to a select. - if (Src->getType() == Type::getInt1Ty(CI.getContext())) - return SelectInst::Create(Src, - Constant::getAllOnesValue(CI.getType()), - Constant::getNullValue(CI.getType())); - - // See if the value being truncated is already sign extended. If so, just - // eliminate the trunc/sext pair. - if (Operator::getOpcode(Src) == Instruction::Trunc) { - Value *Op = cast(Src)->getOperand(0); - unsigned OpBits = Op->getType()->getScalarSizeInBits(); - unsigned MidBits = Src->getType()->getScalarSizeInBits(); - unsigned DestBits = CI.getType()->getScalarSizeInBits(); - unsigned NumSignBits = ComputeNumSignBits(Op); - - if (OpBits == DestBits) { - // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign - // bits, it is already ready. - if (NumSignBits > DestBits-MidBits) - return ReplaceInstUsesWith(CI, Op); - } else if (OpBits < DestBits) { - // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign - // bits, just sext from i32. - if (NumSignBits > OpBits-MidBits) - return new SExtInst(Op, CI.getType(), "tmp"); - } else { - // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign - // bits, just truncate to i32. - if (NumSignBits > OpBits-MidBits) - return new TruncInst(Op, CI.getType(), "tmp"); - } - } - - // If the input is a shl/ashr pair of a same constant, then this is a sign - // extension from a smaller value. If we could trust arbitrary bitwidth - // integers, we could turn this into a truncate to the smaller bit and then - // use a sext for the whole extension. Since we don't, look deeper and check - // for a truncate. If the source and dest are the same type, eliminate the - // trunc and extend and just do shifts. For example, turn: - // %a = trunc i32 %i to i8 - // %b = shl i8 %a, 6 - // %c = ashr i8 %b, 6 - // %d = sext i8 %c to i32 - // into: - // %a = shl i32 %i, 30 - // %d = ashr i32 %a, 30 - Value *A = 0; - ConstantInt *BA = 0, *CA = 0; - if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)), - m_ConstantInt(CA))) && - BA == CA && isa(A)) { - Value *I = cast(A)->getOperand(0); - if (I->getType() == CI.getType()) { - unsigned MidSize = Src->getType()->getScalarSizeInBits(); - unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); - unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; - Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); - I = Builder->CreateShl(I, ShAmtV, CI.getName()); - return BinaryOperator::CreateAShr(I, ShAmtV); } } - return 0; -} - -/// FitsInFPType - Return a Constant* for the specified FP constant if it fits -/// in the specified FP type without changing its value. -static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { - bool losesInfo; - APFloat F = CFP->getValueAPF(); - (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); - if (!losesInfo) - return ConstantFP::get(CFP->getContext(), F); - return 0; -} - -/// LookThroughFPExtensions - If this is an fp extension instruction, look -/// through it until we get the source value. -static Value *LookThroughFPExtensions(Value *V) { - if (Instruction *I = dyn_cast(V)) - if (I->getOpcode() == Instruction::FPExt) - return LookThroughFPExtensions(I->getOperand(0)); - - // If this value is a constant, return the constant in the smallest FP type - // that can accurately represent it. This allows us to turn - // (float)((double)X+2.0) into x+2.0f. - if (ConstantFP *CFP = dyn_cast(V)) { - if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext())) - return V; // No constant folding of this. - // See if the value can be truncated to float and then reextended. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) - return V; - if (CFP->getType() == Type::getDoubleTy(V->getContext())) - return V; // Won't shrink. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble)) - return V; - // Don't try to shrink to various long double types. - } - - return V; -} + // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. + if (Op0I && Op1I && Op0I->isShift() && + Op0I->getOpcode() == Op1I->getOpcode() && + Op0I->getOperand(1) == Op1I->getOperand(1) && + (Op1I->hasOneUse() || Op1I->hasOneUse())) { + Value *NewOp = + Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), + Op0I->getName()); + return BinaryOperator::Create(Op1I->getOpcode(), NewOp, + Op1I->getOperand(1)); + } + + if (Op0I && Op1I) { + Value *A, *B, *C, *D; + // (A & B)^(A | B) -> A ^ B + if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_Or(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + // (A | B)^(A & B) -> A ^ B + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + + // (A & B)^(C & D) + if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && + match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + // (X & Y)^(X & Y) -> (Y^Z) & X + Value *X = 0, *Y = 0, *Z = 0; + if (A == C) + X = A, Y = B, Z = D; + else if (A == D) + X = A, Y = B, Z = C; + else if (B == C) + X = B, Y = A, Z = D; + else if (B == D) + X = B, Y = A, Z = C; + + if (X) { + Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); + return BinaryOperator::CreateAnd(NewOp, X); + } + } + } + + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) + if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) + return R; -Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { - if (Instruction *I = commonCastTransforms(CI)) - return I; - - // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are - // smaller than the destination type, we can eliminate the truncate by doing - // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well as - // many builtins (sqrt, etc). - BinaryOperator *OpI = dyn_cast(CI.getOperand(0)); - if (OpI && OpI->hasOneUse()) { - switch (OpI->getOpcode()) { - default: break; - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - const Type *SrcTy = OpI->getType(); - Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); - Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); - if (LHSTrunc->getType() != SrcTy && - RHSTrunc->getType() != SrcTy) { - unsigned DstSize = CI.getType()->getScalarSizeInBits(); - // If the source types were both smaller than the destination type of - // the cast, do this xform. - if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && - RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { - LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); - RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); - return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); + // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) + if (CastInst *Op0C = dyn_cast(Op0)) { + if (CastInst *Op1C = dyn_cast(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + // Only do this if the casts both really cause code to be generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } } - break; - } } - return 0; -} -Instruction *InstCombiner::visitFPExt(CastInst &CI) { - return commonCastTransforms(CI); + return Changed ? &I : 0; } -Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { - Instruction *OpI = dyn_cast(FI.getOperand(0)); - if (OpI == 0) - return commonCastTransforms(FI); - - // fptoui(uitofp(X)) --> X - // fptoui(sitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa(OpI) || isa(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - - return commonCastTransforms(FI); -} -Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { - Instruction *OpI = dyn_cast(FI.getOperand(0)); - if (OpI == 0) - return commonCastTransforms(FI); - - // fptosi(sitofp(X)) --> X - // fptosi(uitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa(OpI) || isa(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() <= - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - - return commonCastTransforms(FI); +Instruction *InstCombiner::visitShl(BinaryOperator &I) { + return commonShiftTransforms(I); } -Instruction *InstCombiner::visitUIToFP(CastInst &CI) { - return commonCastTransforms(CI); +Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + return commonShiftTransforms(I); } -Instruction *InstCombiner::visitSIToFP(CastInst &CI) { - return commonCastTransforms(CI); -} +Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Instruction *R = commonShiftTransforms(I)) + return R; + + Value *Op0 = I.getOperand(0); + + // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) + if (ConstantInt *CSI = dyn_cast(Op0)) + if (CSI->isAllOnesValue()) + return ReplaceInstUsesWith(I, CSI); -Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { - // If the destination integer type is smaller than the intptr_t type for - // this target, do a ptrtoint to intptr_t then do a trunc. This allows the - // trunc to be exposed to other transforms. Don't do this for extending - // ptrtoint's, because we don't know if the target sign or zero extends its - // pointers. - if (TD && - CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), - "tmp"); - return new TruncInst(P, CI.getType()); - } - - return commonPointerCastTransforms(CI); -} + // See if we can turn a signed shr into an unsigned shr. + if (MaskedValueIsZero(Op0, + APInt::getSignBit(I.getType()->getScalarSizeInBits()))) + return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); -Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { - // If the source integer type is larger than the intptr_t type for - // this target, do a trunc to the intptr_t type, then inttoptr of it. This - // allows the trunc to be exposed to other transforms. Don't do this for - // extending inttoptr's, because we don't know if the target sign or zero - // extends to pointers. - if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), "tmp"); - return new IntToPtrInst(P, CI.getType()); - } - - if (Instruction *I = commonCastTransforms(CI)) - return I; + // Arithmetic shifting an all-sign-bit value is a no-op. + unsigned NumSignBits = ComputeNumSignBits(Op0); + if (NumSignBits == Op0->getType()->getScalarSizeInBits()) + return ReplaceInstUsesWith(I, Op0); return 0; } -Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { - // If the operands are integer typed then apply the integer transforms, - // otherwise just apply the common ones. - Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); +Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { + assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa(SrcTy)) { - if (Instruction *I = commonPointerCastTransforms(CI)) - return I; - } else { - if (Instruction *Result = commonCastTransforms(CI)) - return Result; + // shl X, 0 == X and shr X, 0 == X + // shl 0, X == 0 and shr 0, X == 0 + if (Op1 == Constant::getNullValue(Op1->getType()) || + Op0 == Constant::getNullValue(Op0->getType())) + return ReplaceInstUsesWith(I, Op0); + + if (isa(Op0)) { + if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef + return ReplaceInstUsesWith(I, Op0); + else // undef << X -> 0, undef >>u X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } - - - // Get rid of casts from one type to the same type. These are useless and can - // be replaced by the operand. - if (DestTy == Src->getType()) - return ReplaceInstUsesWith(CI, Src); - - if (const PointerType *DstPTy = dyn_cast(DestTy)) { - const PointerType *SrcPTy = cast(SrcTy); - const Type *DstElTy = DstPTy->getElementType(); - const Type *SrcElTy = SrcPTy->getElementType(); - - // If the address spaces don't match, don't eliminate the bitcast, which is - // required for changing types. - if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) - return 0; - - // If we are casting a alloca to a pointer to a type of the same - // size, rewrite the allocation instruction to allocate the "right" type. - // There is no need to modify malloc calls because it is their bitcast that - // needs to be cleaned up. - if (AllocaInst *AI = dyn_cast(Src)) - if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) - return V; - - // If the source and destination are pointers, and this cast is equivalent - // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. - // This can enhance SROA and other transforms that want type-safe pointers. - Constant *ZeroUInt = - Constant::getNullValue(Type::getInt32Ty(CI.getContext())); - unsigned NumZeros = 0; - while (SrcElTy != DstElTy && - isa(SrcElTy) && !isa(SrcElTy) && - SrcElTy->getNumContainedTypes() /* not "{}" */) { - SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); - ++NumZeros; - } - - // If we found a path from the src to dest, create the getelementptr now. - if (SrcElTy == DstElTy) { - SmallVector Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(),"", - ((Instruction*) NULL)); - } - } - - if (const VectorType *DestVTy = dyn_cast(DestTy)) { - if (DestVTy->getNumElements() == 1) { - if (!isa(SrcTy)) { - Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); - return InsertElementInst::Create(UndefValue::get(DestTy), Elem, - Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); - } - // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) - } + if (isa(Op1)) { + if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X + return ReplaceInstUsesWith(I, Op0); + else // X << undef, X >>u undef -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } - if (const VectorType *SrcVTy = dyn_cast(SrcTy)) { - if (SrcVTy->getNumElements() == 1) { - if (!isa(DestTy)) { - Value *Elem = - Builder->CreateExtractElement(Src, - Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); - return CastInst::Create(Instruction::BitCast, Elem, DestTy); - } - } - } + // See if we can fold away this shift. + if (SimplifyDemandedInstructionBits(I)) + return &I; - if (ShuffleVectorInst *SVI = dyn_cast(Src)) { - if (SVI->hasOneUse()) { - // Okay, we have (bitconvert (shuffle ..)). Check to see if this is - // a bitconvert to a vector with the same # elts. - if (isa(DestTy) && - cast(DestTy)->getNumElements() == - SVI->getType()->getNumElements() && - SVI->getType()->getNumElements() == - cast(SVI->getOperand(0)->getType())->getNumElements()) { - CastInst *Tmp; - // If either of the operands is a cast from CI.getType(), then - // evaluating the shuffle in the casted destination's type will allow - // us to eliminate at least one cast. - if (((Tmp = dyn_cast(SVI->getOperand(0))) && - Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast(SVI->getOperand(1))) && - Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); - Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); - // Return a new shuffle vector. Use the same element ID's, as we - // know the vector types match #elts. - return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); - } - } - } - } - return 0; -} + // Try to fold constant and into select arguments. + if (isa(Op0)) + if (SelectInst *SI = dyn_cast(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; -/// GetSelectFoldableOperands - We want to turn code that looks like this: -/// %C = or %A, %B -/// %D = select %cond, %C, %A -/// into: -/// %C = select %cond, %B, 0 -/// %D = or %A, %C -/// -/// Assuming that the specified instruction is an operand to the select, return -/// a bitmask indicating which operands of this instruction are foldable if they -/// equal the other incoming value of the select. -/// -static unsigned GetSelectFoldableOperands(Instruction *I) { - switch (I->getOpcode()) { - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - return 3; // Can fold through either operand. - case Instruction::Sub: // Can only fold on the amount subtracted. - case Instruction::Shl: // Can only fold on the shift amount. - case Instruction::LShr: - case Instruction::AShr: - return 1; - default: - return 0; // Cannot fold - } + if (ConstantInt *CUI = dyn_cast(Op1)) + if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) + return Res; + return 0; } -/// GetSelectFoldableConstant - For the same transformation as the previous -/// function, return the identity constant that goes into the select. -static Constant *GetSelectFoldableConstant(Instruction *I) { - switch (I->getOpcode()) { - default: llvm_unreachable("This cannot happen!"); - case Instruction::Add: - case Instruction::Sub: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - return Constant::getNullValue(I->getType()); - case Instruction::And: - return Constant::getAllOnesValue(I->getType()); - case Instruction::Mul: - return ConstantInt::get(I->getType(), 1); - } -} +Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + BinaryOperator &I) { + bool isLeftShift = I.getOpcode() == Instruction::Shl; -/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI -/// have the same opcode and only one use each. Try to simplify this. -Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, - Instruction *FI) { - if (TI->getNumOperands() == 1) { - // If this is a non-volatile load or a cast from the same type, - // merge. - if (TI->isCast()) { - if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) - return 0; - } else { - return 0; // unknown unary op. + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); + + // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate + // a signed shift. + // + if (Op1->uge(TypeBits)) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); + else { + I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); + return &I; } - - // Fold this by inserting a select from the input values. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0), SI.getName()+".v"); - InsertNewInstBefore(NewSI, SI); - return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, - TI->getType()); } + + // ((X*C1) << C2) == (X * (C1 << C2)) + if (BinaryOperator *BO = dyn_cast(Op0)) + if (BO->getOpcode() == Instruction::Mul && isLeftShift) + if (Constant *BOOp = dyn_cast(BO->getOperand(1))) + return BinaryOperator::CreateMul(BO->getOperand(0), + ConstantExpr::getShl(BOOp, Op1)); + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) + if (TruncInst *TI = dyn_cast(Op0)) { + Instruction *TrOp = dyn_cast(TI->getOperand(0)); + // If 'shift2' is an ashr, we would have to get the sign bit into a funny + // place. Don't try to do this transformation in this case. Also, we + // require that the input operand is a shift-by-constant so that we have + // confidence that the shifts will get folded together. We could do this + // xform in more cases, but it is unlikely to be profitable. + if (TrOp && I.isLogicalShift() && TrOp->isShift() && + isa(TrOp->getOperand(1))) { + // Okay, we'll do this xform. Make the shift of shift. + Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + // (shift2 (shift1 & 0x00FF), c2) + Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); - // Only handle binary operators here. - if (!isa(TI)) - return 0; - - // Figure out if the operations have any operands in common. - Value *MatchOp, *OtherOpT, *OtherOpF; - bool MatchIsOpZero; - if (TI->getOperand(0) == FI->getOperand(0)) { - MatchOp = TI->getOperand(0); - OtherOpT = TI->getOperand(1); - OtherOpF = FI->getOperand(1); - MatchIsOpZero = true; - } else if (TI->getOperand(1) == FI->getOperand(1)) { - MatchOp = TI->getOperand(1); - OtherOpT = TI->getOperand(0); - OtherOpF = FI->getOperand(0); - MatchIsOpZero = false; - } else if (!TI->isCommutative()) { - return 0; - } else if (TI->getOperand(0) == FI->getOperand(1)) { - MatchOp = TI->getOperand(0); - OtherOpT = TI->getOperand(1); - OtherOpF = FI->getOperand(0); - MatchIsOpZero = true; - } else if (TI->getOperand(1) == FI->getOperand(0)) { - MatchOp = TI->getOperand(1); - OtherOpT = TI->getOperand(0); - OtherOpF = FI->getOperand(1); - MatchIsOpZero = true; - } else { - return 0; - } + // For logical shifts, the truncation has the effect of making the high + // part of the register be zeros. Emulate this by inserting an AND to + // clear the top bits as needed. This 'and' will usually be zapped by + // other xforms later if dead. + unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); + unsigned DstSize = TI->getType()->getScalarSizeInBits(); + APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); + + // The mask we constructed says what the trunc would do if occurring + // between the shifts. We want to know the effect *after* the second + // shift. We know that it is a logical shift by a constant, so adjust the + // mask as appropriate. + if (I.getOpcode() == Instruction::Shl) + MaskV <<= Op1->getZExtValue(); + else { + assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); + MaskV = MaskV.lshr(Op1->getZExtValue()); + } - // If we reach here, they do have operations in common. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, - OtherOpF, SI.getName()+".v"); - InsertNewInstBefore(NewSI, SI); + // shift1 & 0x00FF + Value *And = Builder->CreateAnd(NSh, + ConstantInt::get(I.getContext(), MaskV), + TI->getName()); - if (BinaryOperator *BO = dyn_cast(TI)) { - if (MatchIsOpZero) - return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI); - else - return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); + // Return the value truncated to the interesting size. + return new TruncInst(And, I.getType()); + } } - llvm_unreachable("Shouldn't get here"); - return 0; -} - -static bool isSelect01(Constant *C1, Constant *C2) { - ConstantInt *C1I = dyn_cast(C1); - if (!C1I) - return false; - ConstantInt *C2I = dyn_cast(C2); - if (!C2I) - return false; - return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); -} - -/// FoldSelectIntoOp - Try fold the select into one of the operands to -/// facilitate further optimization. -Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, - Value *FalseVal) { - // See the comment above GetSelectFoldableOperands for a description of the - // transformation we are doing here. - if (Instruction *TVI = dyn_cast(TrueVal)) { - if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && - !isa(FalseVal)) { - if (unsigned SFO = GetSelectFoldableOperands(TVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { - OpToFold = 2; + + if (Op0->hasOneUse()) { + if (BinaryOperator *Op0BO = dyn_cast(Op0)) { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + Value *V1, *V2; + ConstantInt *CC; + switch (Op0BO->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // These operators commute. + // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && + match(Op0BO->getOperand(1), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) + Value *Op0BOOp1 = Op0BO->getOperand(1); + if (isLeftShift && Op0BOOp1->hasOneUse() && + match(Op0BOOp1, + m_And(m_Shr(m_Value(V1), m_Specific(Op1)), + m_ConstantInt(CC))) && + cast(Op0BOOp1)->getOperand(0)->hasOneUse()) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, + Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); + } } - - if (OpToFold) { - Constant *C = GetSelectFoldableConstant(TVI); - Value *OOp = TVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. - if (!isa(OOp) || isSelect01(C, cast(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); - InsertNewInstBefore(NewSel, SI); - NewSel->takeName(TVI); - if (BinaryOperator *BO = dyn_cast(TVI)) - return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + + // FALL THROUGH. + case Instruction::Sub: { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), + m_And(m_Shr(m_Value(V1), m_Value(V2)), + m_ConstantInt(CC))) && V2 == Op1 && + cast(Op0BO->getOperand(0)) + ->getOperand(0)->hasOneUse()) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + + return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } + + break; } } - } - } - - if (Instruction *FVI = dyn_cast(FalseVal)) { - if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && - !isa(TrueVal)) { - if (unsigned SFO = GetSelectFoldableOperands(FVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { - OpToFold = 2; + + + // If the operand is an bitwise operator with a constant RHS, and the + // shift is the only use, we can pull it out of the shift. + if (ConstantInt *Op0C = dyn_cast(Op0BO->getOperand(1))) { + bool isValid = true; // Valid only for And, Or, Xor + bool highBitSet = false; // Transform if high bit of constant set? + + switch (Op0BO->getOpcode()) { + default: isValid = false; break; // Do not perform transform! + case Instruction::Add: + isValid = isLeftShift; + break; + case Instruction::Or: + case Instruction::Xor: + highBitSet = false; + break; + case Instruction::And: + highBitSet = true; + break; } - - if (OpToFold) { - Constant *C = GetSelectFoldableConstant(FVI); - Value *OOp = FVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. - if (!isa(OOp) || isSelect01(C, cast(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); - InsertNewInstBefore(NewSel, SI); - NewSel->takeName(FVI); - if (BinaryOperator *BO = dyn_cast(FVI)) - return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - llvm_unreachable("Unknown instruction!!"); - } + + // If this is a signed shift right, and the high bit is modified + // by the logical operation, do not perform the transformation. + // The highBitSet boolean indicates the value of the high bit of + // the constant which would cause it to be modified for this + // operation. + // + if (isValid && I.getOpcode() == Instruction::AShr) + isValid = Op0C->getValue()[TypeBits-1] == highBitSet; + + if (isValid) { + Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); + + Value *NewShift = + Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); + NewShift->takeName(Op0BO); + + return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, + NewRHS); } } } } - - return 0; -} - -/// visitSelectInstWithICmp - Visit a SelectInst that has an -/// ICmpInst as its first operand. -/// -Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, - ICmpInst *ICI) { - bool Changed = false; - ICmpInst::Predicate Pred = ICI->getPredicate(); - Value *CmpLHS = ICI->getOperand(0); - Value *CmpRHS = ICI->getOperand(1); - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); - - // Check cases where the comparison is with a constant that - // can be adjusted to fit the min/max idiom. We may edit ICI in - // place here, so make sure the select is the only user. - if (ICI->hasOneUse()) - if (ConstantInt *CI = dyn_cast(CmpRHS)) { - switch (Pred) { - default: break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: { - // X < MIN ? T : F --> F - if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X < C ? X : C-1 --> X > C-1 ? C-1 : X - Constant *AdjustedRHS = SubOne(CI); - if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } - break; - } - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: { - // X > MAX ? T : F --> F - if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X > C ? X : C+1 --> X < C+1 ? C+1 : X - Constant *AdjustedRHS = AddOne(CI); - if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } - break; - } - } - - // (x ashr x, 31 -> all ones if signed - // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed - CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; - if (match(TrueVal, m_ConstantInt<-1>()) && - match(FalseVal, m_ConstantInt<0>())) - Pred = ICI->getPredicate(); - else if (match(TrueVal, m_ConstantInt<0>()) && - match(FalseVal, m_ConstantInt<-1>())) - Pred = CmpInst::getInversePredicate(ICI->getPredicate()); - - if (Pred != CmpInst::BAD_ICMP_PREDICATE) { - // If we are just checking for a icmp eq of a single bit and zext'ing it - // to an integer, then shift the bit to the appropriate place and then - // cast to integer to avoid the comparison. - const APInt &Op1CV = CI->getValue(); + + // Find out if this is a shift of a shift by a constant. + BinaryOperator *ShiftOp = dyn_cast(Op0); + if (ShiftOp && !ShiftOp->isShift()) + ShiftOp = 0; + + if (ShiftOp && isa(ShiftOp->getOperand(1))) { + ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1)); + uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); + uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); + assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); + if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. + Value *X = ShiftOp->getOperand(0); - // sext (x x>>s31 true if signbit set. - // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. - if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || - (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { - Value *In = ICI->getOperand(0); - Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getScalarSizeInBits()-1); - In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, - In->getName()+".lobit"), - *ICI); - if (In->getType() != SI.getType()) - In = CastInst::CreateIntegerCast(In, SI.getType(), - true/*SExt*/, "tmp", ICI); + uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - if (Pred == ICmpInst::ICMP_SGT) - In = InsertNewInstBefore(BinaryOperator::CreateNot(In, - In->getName()+".not"), *ICI); + const IntegerType *Ty = cast(I.getType()); - return ReplaceInstUsesWith(SI, In); - } + // Check for (X << c1) << c2 and (X >> c1) >> c2 + if (I.getOpcode() == ShiftOp->getOpcode()) { + // If this is oversized composite shift, then unsigned shifts get 0, ashr + // saturates. + if (AmtSum >= TypeBits) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. } + + return BinaryOperator::Create(I.getOpcode(), X, + ConstantInt::get(Ty, AmtSum)); } - - if (CmpLHS == TrueVal && CmpRHS == FalseVal) { - // Transform (X == Y) ? X : Y -> Y - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? X : Y -> X - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX - - } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { - // Transform (X == Y) ? Y : X -> X - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? Y : X -> Y - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX - } - return Changed ? &SI : 0; -} - - -/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a -/// PHI node (but the two may be in different blocks). See if the true/false -/// values (V) are live in all of the predecessor blocks of the PHI. For -/// example, cases like this cannot be mapped: -/// -/// X = phi [ C1, BB1], [C2, BB2] -/// Y = add -/// Z = select X, Y, 0 -/// -/// because Y is not live in BB1/BB2. -/// -static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, - const SelectInst &SI) { - // If the value is a non-instruction value like a constant or argument, it - // can always be mapped. - const Instruction *I = dyn_cast(V); - if (I == 0) return true; - - // If V is a PHI node defined in the same block as the condition PHI, we can - // map the arguments. - const PHINode *CondPHI = cast(SI.getCondition()); - - if (const PHINode *VP = dyn_cast(I)) - if (VP->getParent() == CondPHI->getParent()) - return true; - - // Otherwise, if the PHI and select are defined in the same block and if V is - // defined in a different block, then we can transform it. - if (SI.getParent() == CondPHI->getParent() && - I->getParent() != CondPHI->getParent()) - return true; - - // Otherwise we have a 'hard' case and we can't tell without doing more - // detailed dominator based analysis, punt. - return false; -} - -/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: -/// SPF2(SPF1(A, B), C) -Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, - SelectPatternFlavor SPF1, - Value *A, Value *B, - Instruction &Outer, - SelectPatternFlavor SPF2, Value *C) { - if (C == A || C == B) { - // MAX(MAX(A, B), B) -> MAX(A, B) - // MIN(MIN(a, b), a) -> MIN(a, b) - if (SPF1 == SPF2) - return ReplaceInstUsesWith(Outer, Inner); - // MAX(MIN(a, b), a) -> a - // MIN(MAX(a, b), a) -> a - if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || - (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || - (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || - (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) - return ReplaceInstUsesWith(Outer, C); - } - - // TODO: MIN(MIN(A, 23), 97) - return 0; -} - - - - -Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { - Value *CondVal = SI.getCondition(); - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); - - // select true, X, Y -> X - // select false, X, Y -> Y - if (ConstantInt *C = dyn_cast(CondVal)) - return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal); - - // select C, X, X -> X - if (TrueVal == FalseVal) - return ReplaceInstUsesWith(SI, TrueVal); - - if (isa(TrueVal)) // select C, undef, X -> X - return ReplaceInstUsesWith(SI, FalseVal); - if (isa(FalseVal)) // select C, X, undef -> X - return ReplaceInstUsesWith(SI, TrueVal); - if (isa(CondVal)) { // select undef, X, Y -> X or Y - if (isa(TrueVal)) - return ReplaceInstUsesWith(SI, TrueVal); - else - return ReplaceInstUsesWith(SI, FalseVal); - } + if (ShiftOp->getOpcode() == Instruction::LShr && + I.getOpcode() == Instruction::AShr) { + if (AmtSum >= TypeBits) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. + return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftOp->getOpcode() == Instruction::AShr && + I.getOpcode() == Instruction::LShr) { + // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. + if (AmtSum >= TypeBits) + AmtSum = TypeBits-1; + + Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); - if (SI.getType() == Type::getInt1Ty(SI.getContext())) { - if (ConstantInt *C = dyn_cast(TrueVal)) { - if (C->getZExtValue()) { - // Change: A = select B, true, C --> A = or B, C - return BinaryOperator::CreateOr(CondVal, FalseVal); - } else { - // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return BinaryOperator::CreateAnd(NotCond, FalseVal); - } - } else if (ConstantInt *C = dyn_cast(FalseVal)) { - if (C->getZExtValue() == false) { - // Change: A = select B, C, false --> A = and B, C - return BinaryOperator::CreateAnd(CondVal, TrueVal); - } else { - // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return BinaryOperator::CreateOr(NotCond, TrueVal); - } + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(), Mask)); } - // select a, b, a -> a&b - // select a, a, b -> a|b - if (CondVal == TrueVal) - return BinaryOperator::CreateOr(CondVal, FalseVal); - else if (CondVal == FalseVal) - return BinaryOperator::CreateAnd(CondVal, TrueVal); - } - - // Selecting between two integer constants? - if (ConstantInt *TrueValC = dyn_cast(TrueVal)) - if (ConstantInt *FalseValC = dyn_cast(FalseVal)) { - // select C, 1, 0 -> zext C to int - if (FalseValC->isZero() && TrueValC->getValue() == 1) { - return CastInst::Create(Instruction::ZExt, CondVal, SI.getType()); - } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { - // select C, 0, 1 -> zext !C to int - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return CastInst::Create(Instruction::ZExt, NotCond, SI.getType()); + // Okay, if we get here, one shift must be left, and the other shift must be + // right. See if the amounts are equal. + if (ShiftAmt1 == ShiftAmt2) { + // If we have ((X >>? C) << C), turn this into X & (-1 << C). + if (I.getOpcode() == Instruction::Shl) { + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, + ConstantInt::get(I.getContext(),Mask)); } - - if (ICmpInst *IC = dyn_cast(SI.getCondition())) { - // If one of the constants is zero (we know they can't both be) and we - // have an icmp instruction with zero, and we have an 'and' with the - // non-constant value, eliminate this whole mess. This corresponds to - // cases like this: ((X & 27) ? 27 : 0) - if (TrueValC->isZero() || FalseValC->isZero()) - if (IC->isEquality() && isa(IC->getOperand(1)) && - cast(IC->getOperand(1))->isNullValue()) - if (Instruction *ICA = dyn_cast(IC->getOperand(0))) - if (ICA->getOpcode() == Instruction::And && - isa(ICA->getOperand(1)) && - (ICA->getOperand(1) == TrueValC || - ICA->getOperand(1) == FalseValC) && - isOneBitSet(cast(ICA->getOperand(1)))) { - // Okay, now we know that everything is set up, we just don't - // know whether we have a icmp_ne or icmp_eq and whether the - // true or false val is the zero. - bool ShouldNotVal = !TrueValC->isZero(); - ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; - Value *V = ICA; - if (ShouldNotVal) - V = InsertNewInstBefore(BinaryOperator::Create( - Instruction::Xor, V, ICA->getOperand(1)), SI); - return ReplaceInstUsesWith(SI, V); - } + // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). + if (I.getOpcode() == Instruction::LShr) { + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, + ConstantInt::get(I.getContext(), Mask)); } - } + // We can simplify ((X << C) >>s C) into a trunc + sext. + // NOTE: we could do this for any C, but that would make 'unusual' integer + // types. For now, just stick to ones well-supported by the code + // generators. + const Type *SExtType = 0; + switch (Ty->getBitWidth() - ShiftAmt1) { + case 1 : + case 8 : + case 16 : + case 32 : + case 64 : + case 128: + SExtType = IntegerType::get(I.getContext(), + Ty->getBitWidth() - ShiftAmt1); + break; + default: break; + } + if (SExtType) + return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); + // Otherwise, we can't handle it yet. + } else if (ShiftAmt1 < ShiftAmt2) { + uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; + + // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) + if (I.getOpcode() == Instruction::Shl) { + assert(ShiftOp->getOpcode() == Instruction::LShr || + ShiftOp->getOpcode() == Instruction::AShr); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr) { + assert(ShiftOp->getOpcode() == Instruction::Shl); + Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. + } else { + assert(ShiftAmt2 < ShiftAmt1); + uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; - // See if we are selecting two values based on a comparison of the two values. - if (FCmpInst *FCI = dyn_cast(CondVal)) { - if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { - // Transform (X == Y) ? X : Y -> Y - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: - // consider X== -0, Y== +0. - // It becomes safe if either operand is a nonzero constant. - ConstantFP *CFPt, *CFPf; - if (((CFPt = dyn_cast(TrueVal)) && - !CFPt->getValueAPF().isZero()) || - ((CFPf = dyn_cast(FalseVal)) && - !CFPf->getValueAPF().isZero())) - return ReplaceInstUsesWith(SI, FalseVal); + // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) + if (I.getOpcode() == Instruction::Shl) { + assert(ShiftOp->getOpcode() == Instruction::LShr || + ShiftOp->getOpcode() == Instruction::AShr); + Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, + ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); } - // Transform (X != Y) ? X : Y -> X - if (FCI->getPredicate() == FCmpInst::FCMP_ONE) - return ReplaceInstUsesWith(SI, TrueVal); - // NOTE: if we wanted to, this is where to detect MIN/MAX - - } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ - // Transform (X == Y) ? Y : X -> X - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: - // consider X== -0, Y== +0. - // It becomes safe if either operand is a nonzero constant. - ConstantFP *CFPt, *CFPf; - if (((CFPt = dyn_cast(TrueVal)) && - !CFPt->getValueAPF().isZero()) || - ((CFPf = dyn_cast(FalseVal)) && - !CFPf->getValueAPF().isZero())) - return ReplaceInstUsesWith(SI, FalseVal); + + // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr) { + assert(ShiftOp->getOpcode() == Instruction::Shl); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); } - // Transform (X != Y) ? Y : X -> Y - if (FCI->getPredicate() == FCmpInst::FCMP_ONE) - return ReplaceInstUsesWith(SI, TrueVal); - // NOTE: if we wanted to, this is where to detect MIN/MAX + + // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. } - // NOTE: if we wanted to, this is where to detect ABS } + return 0; +} - // See if we are selecting two values based on a comparison of the two values. - if (ICmpInst *ICI = dyn_cast(CondVal)) - if (Instruction *Result = visitSelectInstWithICmp(SI, ICI)) - return Result; - - if (Instruction *TI = dyn_cast(TrueVal)) - if (Instruction *FI = dyn_cast(FalseVal)) - if (TI->hasOneUse() && FI->hasOneUse()) { - Instruction *AddOp = 0, *SubOp = 0; - - // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) - if (TI->getOpcode() == FI->getOpcode()) - if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) - return IV; - - // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is - // even legal for FP. - if ((TI->getOpcode() == Instruction::Sub && - FI->getOpcode() == Instruction::Add) || - (TI->getOpcode() == Instruction::FSub && - FI->getOpcode() == Instruction::FAdd)) { - AddOp = FI; SubOp = TI; - } else if ((FI->getOpcode() == Instruction::Sub && - TI->getOpcode() == Instruction::Add) || - (FI->getOpcode() == Instruction::FSub && - TI->getOpcode() == Instruction::FAdd)) { - AddOp = TI; SubOp = FI; - } - - if (AddOp) { - Value *OtherAddOp = 0; - if (SubOp->getOperand(0) == AddOp->getOperand(0)) { - OtherAddOp = AddOp->getOperand(1); - } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { - OtherAddOp = AddOp->getOperand(0); - } - - if (OtherAddOp) { - // So at this point we know we have (Y -> OtherAddOp): - // select C, (add X, Y), (sub X, Z) - Value *NegVal; // Compute -Z - if (Constant *C = dyn_cast(SubOp->getOperand(1))) { - NegVal = ConstantExpr::getNeg(C); - } else { - NegVal = InsertNewInstBefore( - BinaryOperator::CreateNeg(SubOp->getOperand(1), - "tmp"), SI); - } - - Value *NewTrueOp = OtherAddOp; - Value *NewFalseOp = NegVal; - if (AddOp != TI) - std::swap(NewTrueOp, NewFalseOp); - Instruction *NewSel = - SelectInst::Create(CondVal, NewTrueOp, - NewFalseOp, SI.getName() + ".p"); - NewSel = InsertNewInstBefore(NewSel, SI); - return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); - } - } - } - // See if we can fold the select into one of our operands. - if (SI.getType()->isInteger()) { - if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) - return FoldI; +/// FindElementAtOffset - Given a type and a constant offset, determine whether +/// or not there is a sequence of GEP indices into the type that will land us at +/// the specified offset. If so, fill them into NewIndices and return the +/// resultant element type, otherwise return null. +const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, + SmallVectorImpl &NewIndices) { + if (!TD) return 0; + if (!Ty->isSized()) return 0; + + // Start with the index over the outer type. Note that the type size + // might be zero (even if the offset isn't zero) if the indexed type + // is something like [0 x {int, int}] + const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + int64_t FirstIdx = 0; + if (int64_t TySize = TD->getTypeAllocSize(Ty)) { + FirstIdx = Offset/TySize; + Offset -= FirstIdx*TySize; - // MAX(MAX(a, b), a) -> MAX(a, b) - // MIN(MIN(a, b), a) -> MIN(a, b) - // MAX(MIN(a, b), a) -> a - // MIN(MAX(a, b), a) -> a - Value *LHS, *RHS, *LHS2, *RHS2; - if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { - if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) - if (Instruction *R = FoldSPFofSPF(cast(LHS),SPF2,LHS2,RHS2, - SI, SPF, RHS)) - return R; - if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) - if (Instruction *R = FoldSPFofSPF(cast(RHS),SPF2,LHS2,RHS2, - SI, SPF, LHS)) - return R; + // Handle hosts where % returns negative instead of values [0..TySize). + if (Offset < 0) { + --FirstIdx; + Offset += TySize; + assert(Offset >= 0); } - - // TODO. - // ABS(-X) -> ABS(X) - // ABS(ABS(X)) -> ABS(X) + assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } - - // See if we can fold the select into a phi node if the condition is a select. - if (isa(SI.getCondition())) - // The true/false values have to be live in the PHI predecessor's blocks. - if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && - CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) - if (Instruction *NV = FoldOpIntoPhi(SI)) - return NV; - - if (BinaryOperator::isNot(CondVal)) { - SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); - SI.setOperand(1, FalseVal); - SI.setOperand(2, TrueVal); - return &SI; + + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + + // Index into the types. If we fail, set OrigBase to null. + while (Offset) { + // Indexing into tail padding between struct/array elements. + if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) + return 0; + + if (const StructType *STy = dyn_cast(Ty)) { + const StructLayout *SL = TD->getStructLayout(STy); + assert(Offset < (int64_t)SL->getSizeInBytes() && + "Offset must stay within the indexed type"); + + unsigned Elt = SL->getElementContainingOffset(Offset); + NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + Elt)); + + Offset -= SL->getElementOffset(Elt); + Ty = STy->getElementType(Elt); + } else if (const ArrayType *AT = dyn_cast(Ty)) { + uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); + assert(EltSize && "Cannot index into a zero-sized array"); + NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + Offset %= EltSize; + Ty = AT->getElementType(); + } else { + // Otherwise, we can't index into the middle of this atomic type, bail. + return 0; + } } - - return 0; + + return Ty; } + /// EnforceKnownAlignment - If the specified pointer points to an object that /// we control, modify the object's alignment to PrefAlign. This isn't /// often possible though. If alignment is important, a more reliable approach @@ -10791,923 +5334,102 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { if (NestVal->getType() != NestTy) NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); NewArgs.push_back(NestVal); - NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); - } - - if (I == E) - break; - - // Add the original argument and attributes. - NewArgs.push_back(*I); - if (Attributes Attr = Attrs.getParamAttributes(Idx)) - NewAttrs.push_back - (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); - - ++Idx, ++I; - } while (1); - } - - // Add any function attributes. - if (Attributes Attr = Attrs.getFnAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); - - // The trampoline may have been bitcast to a bogus type (FTy). - // Handle this by synthesizing a new function type, equal to FTy - // with the chain parameter inserted. - - std::vector NewTypes; - NewTypes.reserve(FTy->getNumParams()+1); - - // Insert the chain's type into the list of parameter types, which may - // mean appending it. - { - unsigned Idx = 1; - FunctionType::param_iterator I = FTy->param_begin(), - E = FTy->param_end(); - - do { - if (Idx == NestIdx) - // Add the chain's type. - NewTypes.push_back(NestTy); - - if (I == E) - break; - - // Add the original type. - NewTypes.push_back(*I); - - ++Idx, ++I; - } while (1); - } - - // Replace the trampoline call with a direct call. Let the generic - // code sort out any function type mismatches. - FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, - FTy->isVarArg()); - Constant *NewCallee = - NestF->getType() == PointerType::getUnqual(NewFTy) ? - NestF : ConstantExpr::getBitCast(NestF, - PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), - NewAttrs.end()); - - Instruction *NewCaller; - if (InvokeInst *II = dyn_cast(Caller)) { - NewCaller = InvokeInst::Create(NewCallee, - II->getNormalDest(), II->getUnwindDest(), - NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); - cast(NewCaller)->setCallingConv(II->getCallingConv()); - cast(NewCaller)->setAttributes(NewPAL); - } else { - NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); - if (cast(Caller)->isTailCall()) - cast(NewCaller)->setTailCall(); - cast(NewCaller)-> - setCallingConv(cast(Caller)->getCallingConv()); - cast(NewCaller)->setAttributes(NewPAL); - } - if (!Caller->getType()->isVoidTy()) - Caller->replaceAllUsesWith(NewCaller); - Caller->eraseFromParent(); - Worklist.Remove(Caller); - return 0; - } - } - - // Replace the trampoline call with a direct call. Since there is no 'nest' - // parameter, there is no need to adjust the argument list. Let the generic - // code sort out any function type mismatches. - Constant *NewCallee = - NestF->getType() == PTy ? NestF : - ConstantExpr::getBitCast(NestF, PTy); - CS.setCalledFunction(NewCallee); - return CS.getInstruction(); -} - -/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] -/// and if a/b/c and the add's all have a single use, turn this into a phi -/// and a single binop. -Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { - Instruction *FirstInst = cast(PN.getIncomingValue(0)); - assert(isa(FirstInst) || isa(FirstInst)); - unsigned Opc = FirstInst->getOpcode(); - Value *LHSVal = FirstInst->getOperand(0); - Value *RHSVal = FirstInst->getOperand(1); - - const Type *LHSType = LHSVal->getType(); - const Type *RHSType = RHSVal->getType(); - - // Scan to see if all operands are the same opcode, and all have one use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - Instruction *I = dyn_cast(PN.getIncomingValue(i)); - if (!I || I->getOpcode() != Opc || !I->hasOneUse() || - // Verify type of the LHS matches so we don't fold cmp's of different - // types or GEP's with different index types. - I->getOperand(0)->getType() != LHSType || - I->getOperand(1)->getType() != RHSType) - return 0; - - // If they are CmpInst instructions, check their predicates - if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) - if (cast(I)->getPredicate() != - cast(FirstInst)->getPredicate()) - return 0; - - // Keep track of which operand needs a phi node. - if (I->getOperand(0) != LHSVal) LHSVal = 0; - if (I->getOperand(1) != RHSVal) RHSVal = 0; - } - - // If both LHS and RHS would need a PHI, don't do this transformation, - // because it would increase the number of PHIs entering the block, - // which leads to higher register pressure. This is especially - // bad when the PHIs are in the header of a loop. - if (!LHSVal && !RHSVal) - return 0; - - // Otherwise, this is safe to transform! - - Value *InLHS = FirstInst->getOperand(0); - Value *InRHS = FirstInst->getOperand(1); - PHINode *NewLHS = 0, *NewRHS = 0; - if (LHSVal == 0) { - NewLHS = PHINode::Create(LHSType, - FirstInst->getOperand(0)->getName() + ".pn"); - NewLHS->reserveOperandSpace(PN.getNumOperands()/2); - NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); - InsertNewInstBefore(NewLHS, PN); - LHSVal = NewLHS; - } - - if (RHSVal == 0) { - NewRHS = PHINode::Create(RHSType, - FirstInst->getOperand(1)->getName() + ".pn"); - NewRHS->reserveOperandSpace(PN.getNumOperands()/2); - NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); - InsertNewInstBefore(NewRHS, PN); - RHSVal = NewRHS; - } - - // Add all operands to the new PHIs. - if (NewLHS || NewRHS) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *InInst = cast(PN.getIncomingValue(i)); - if (NewLHS) { - Value *NewInLHS = InInst->getOperand(0); - NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); - } - if (NewRHS) { - Value *NewInRHS = InInst->getOperand(1); - NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); - } - } - } - - if (BinaryOperator *BinOp = dyn_cast(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); - CmpInst *CIOp = cast(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - LHSVal, RHSVal); -} - -Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { - GetElementPtrInst *FirstInst =cast(PN.getIncomingValue(0)); - - SmallVector FixedOperands(FirstInst->op_begin(), - FirstInst->op_end()); - // This is true if all GEP bases are allocas and if all indices into them are - // constants. - bool AllBasePointersAreAllocas = true; - - // We don't want to replace this phi if the replacement would require - // more than one phi, which leads to higher register pressure. This is - // especially bad when the PHIs are in the header of a loop. - bool NeededPhi = false; - - // Scan to see if all operands are the same opcode, and all have one use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - GetElementPtrInst *GEP= dyn_cast(PN.getIncomingValue(i)); - if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || - GEP->getNumOperands() != FirstInst->getNumOperands()) - return 0; - - // Keep track of whether or not all GEPs are of alloca pointers. - if (AllBasePointersAreAllocas && - (!isa(GEP->getOperand(0)) || - !GEP->hasAllConstantIndices())) - AllBasePointersAreAllocas = false; - - // Compare the operand lists. - for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { - if (FirstInst->getOperand(op) == GEP->getOperand(op)) - continue; - - // Don't merge two GEPs when two operands differ (introducing phi nodes) - // if one of the PHIs has a constant for the index. The index may be - // substantially cheaper to compute for the constants, so making it a - // variable index could pessimize the path. This also handles the case - // for struct indices, which must always be constant. - if (isa(FirstInst->getOperand(op)) || - isa(GEP->getOperand(op))) - return 0; - - if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) - return 0; - - // If we already needed a PHI for an earlier operand, and another operand - // also requires a PHI, we'd be introducing more PHIs than we're - // eliminating, which increases register pressure on entry to the PHI's - // block. - if (NeededPhi) - return 0; - - FixedOperands[op] = 0; // Needs a PHI. - NeededPhi = true; - } - } - - // If all of the base pointers of the PHI'd GEPs are from allocas, don't - // bother doing this transformation. At best, this will just save a bit of - // offset calculation, but all the predecessors will have to materialize the - // stack address into a register anyway. We'd actually rather *clone* the - // load up into the predecessors so that we have a load of a gep of an alloca, - // which can usually all be folded into the load. - if (AllBasePointersAreAllocas) - return 0; - - // Otherwise, this is safe to transform. Insert PHI nodes for each operand - // that is variable. - SmallVector OperandPhis(FixedOperands.size()); - - bool HasAnyPHIs = false; - for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { - if (FixedOperands[i]) continue; // operand doesn't need a phi. - Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType(), - FirstOp->getName()+".pn"); - InsertNewInstBefore(NewPN, PN); - - NewPN->reserveOperandSpace(e); - NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); - OperandPhis[i] = NewPN; - FixedOperands[i] = NewPN; - HasAnyPHIs = true; - } - - - // Add all operands to the new PHIs. - if (HasAnyPHIs) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - GetElementPtrInst *InGEP =cast(PN.getIncomingValue(i)); - BasicBlock *InBB = PN.getIncomingBlock(i); - - for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) - if (PHINode *OpPhi = OperandPhis[op]) - OpPhi->addIncoming(InGEP->getOperand(op), InBB); - } - } - - Value *Base = FixedOperands[0]; - return cast(FirstInst)->isInBounds() ? - GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, - FixedOperands.end()) : - GetElementPtrInst::Create(Base, FixedOperands.begin()+1, - FixedOperands.end()); -} - - -/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to -/// sink the load out of the block that defines it. This means that it must be -/// obvious the value of the load is not changed from the point of the load to -/// the end of the block it is in. -/// -/// Finally, it is safe, but not profitable, to sink a load targetting a -/// non-address-taken alloca. Doing so will cause us to not promote the alloca -/// to a register. -static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { - BasicBlock::iterator BBI = L, E = L->getParent()->end(); - - for (++BBI; BBI != E; ++BBI) - if (BBI->mayWriteToMemory()) - return false; - - // Check for non-address taken alloca. If not address-taken already, it isn't - // profitable to do this xform. - if (AllocaInst *AI = dyn_cast(L->getOperand(0))) { - bool isAddressTaken = false; - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E; ++UI) { - if (isa(UI)) continue; - if (StoreInst *SI = dyn_cast(*UI)) { - // If storing TO the alloca, then the address isn't taken. - if (SI->getOperand(1) == AI) continue; - } - isAddressTaken = true; - break; - } - - if (!isAddressTaken && AI->isStaticAlloca()) - return false; - } - - // If this load is a load from a GEP with a constant offset from an alloca, - // then we don't want to sink it. In its present form, it will be - // load [constant stack offset]. Sinking it will cause us to have to - // materialize the stack addresses in each predecessor in a register only to - // do a shared load from register in the successor. - if (GetElementPtrInst *GEP = dyn_cast(L->getOperand(0))) - if (AllocaInst *AI = dyn_cast(GEP->getOperand(0))) - if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) - return false; - - return true; -} - -Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { - LoadInst *FirstLI = cast(PN.getIncomingValue(0)); - - // When processing loads, we need to propagate two bits of information to the - // sunk load: whether it is volatile, and what its alignment is. We currently - // don't sink loads when some have their alignment specified and some don't. - // visitLoadInst will propagate an alignment onto the load when TD is around, - // and if TD isn't around, we can't handle the mixed case. - bool isVolatile = FirstLI->isVolatile(); - unsigned LoadAlignment = FirstLI->getAlignment(); - - // We can't sink the load if the loaded value could be modified between the - // load and the PHI. - if (FirstLI->getParent() != PN.getIncomingBlock(0) || - !isSafeAndProfitableToSinkLoad(FirstLI)) - return 0; - - // If the PHI is of volatile loads and the load block has multiple - // successors, sinking it would remove a load of the volatile value from - // the path through the other successor. - if (isVolatile && - FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; - - // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - LoadInst *LI = dyn_cast(PN.getIncomingValue(i)); - if (!LI || !LI->hasOneUse()) - return 0; - - // We can't sink the load if the loaded value could be modified between - // the load and the PHI. - if (LI->isVolatile() != isVolatile || - LI->getParent() != PN.getIncomingBlock(i) || - !isSafeAndProfitableToSinkLoad(LI)) - return 0; - - // If some of the loads have an alignment specified but not all of them, - // we can't do the transformation. - if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) - return 0; - - LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); - - // If the PHI is of volatile loads and the load block has multiple - // successors, sinking it would remove a load of the volatile value from - // the path through the other successor. - if (isVolatile && - LI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; - } - - // Okay, they are all the same operation. Create a new PHI node of the - // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), - PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); - - Value *InVal = FirstLI->getOperand(0); - NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); - - // Add all operands to the new PHI. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); - if (NewInVal != InVal) - InVal = 0; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); - } - - Value *PhiVal; - if (InVal) { - // The new PHI unions all of the same values together. This is really - // common, so we handle it intelligently here for compile-time speed. - PhiVal = InVal; - delete NewPN; - } else { - InsertNewInstBefore(NewPN, PN); - PhiVal = NewPN; - } - - // If this was a volatile load that we are merging, make sure to loop through - // and mark all the input loads as non-volatile. If we don't do this, we will - // insert a new volatile load and the old ones will not be deletable. - if (isVolatile) - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) - cast(PN.getIncomingValue(i))->setVolatile(false); - - return new LoadInst(PhiVal, "", isVolatile, LoadAlignment); -} - - - -/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" -/// operator and they all are only used by the PHI, PHI together their -/// inputs, and do the operation once, to the result of the PHI. -Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { - Instruction *FirstInst = cast(PN.getIncomingValue(0)); - - if (isa(FirstInst)) - return FoldPHIArgGEPIntoPHI(PN); - if (isa(FirstInst)) - return FoldPHIArgLoadIntoPHI(PN); - - // Scan the instruction, looking for input operations that can be folded away. - // If all input operands to the phi are the same instruction (e.g. a cast from - // the same type or "+42") we can pull the operation through the PHI, reducing - // code size and simplifying code. - Constant *ConstantOp = 0; - const Type *CastSrcTy = 0; - - if (isa(FirstInst)) { - CastSrcTy = FirstInst->getOperand(0)->getType(); - - // Be careful about transforming integer PHIs. We don't want to pessimize - // the code by turning an i32 into an i1293. - if (isa(PN.getType()) && isa(CastSrcTy)) { - if (!ShouldChangeType(PN.getType(), CastSrcTy, TD)) - return 0; - } - } else if (isa(FirstInst) || isa(FirstInst)) { - // Can fold binop, compare or shift here if the RHS is a constant, - // otherwise call FoldPHIArgBinOpIntoPHI. - ConstantOp = dyn_cast(FirstInst->getOperand(1)); - if (ConstantOp == 0) - return FoldPHIArgBinOpIntoPHI(PN); - } else { - return 0; // Cannot fold this operation. - } - - // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *I = dyn_cast(PN.getIncomingValue(i)); - if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) - return 0; - if (CastSrcTy) { - if (I->getOperand(0)->getType() != CastSrcTy) - return 0; // Cast operation must match. - } else if (I->getOperand(1) != ConstantOp) { - return 0; - } - } - - // Okay, they are all the same operation. Create a new PHI node of the - // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), - PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); - - Value *InVal = FirstInst->getOperand(0); - NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); + NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); + } - // Add all operands to the new PHI. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); - if (NewInVal != InVal) - InVal = 0; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); - } + if (I == E) + break; - Value *PhiVal; - if (InVal) { - // The new PHI unions all of the same values together. This is really - // common, so we handle it intelligently here for compile-time speed. - PhiVal = InVal; - delete NewPN; - } else { - InsertNewInstBefore(NewPN, PN); - PhiVal = NewPN; - } + // Add the original argument and attributes. + NewArgs.push_back(*I); + if (Attributes Attr = Attrs.getParamAttributes(Idx)) + NewAttrs.push_back + (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); - // Insert and return the new operation. - if (CastInst *FirstCI = dyn_cast(FirstInst)) - return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); - - if (BinaryOperator *BinOp = dyn_cast(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); - - CmpInst *CIOp = cast(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - PhiVal, ConstantOp); -} + ++Idx, ++I; + } while (1); + } -/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle -/// that is dead. -static bool DeadPHICycle(PHINode *PN, - SmallPtrSet &PotentiallyDeadPHIs) { - if (PN->use_empty()) return true; - if (!PN->hasOneUse()) return false; + // Add any function attributes. + if (Attributes Attr = Attrs.getFnAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); - // Remember this node, and if we find the cycle, return. - if (!PotentiallyDeadPHIs.insert(PN)) - return true; - - // Don't scan crazily complex things. - if (PotentiallyDeadPHIs.size() == 16) - return false; + // The trampoline may have been bitcast to a bogus type (FTy). + // Handle this by synthesizing a new function type, equal to FTy + // with the chain parameter inserted. - if (PHINode *PU = dyn_cast(PN->use_back())) - return DeadPHICycle(PU, PotentiallyDeadPHIs); + std::vector NewTypes; + NewTypes.reserve(FTy->getNumParams()+1); - return false; -} + // Insert the chain's type into the list of parameter types, which may + // mean appending it. + { + unsigned Idx = 1; + FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); -/// PHIsEqualValue - Return true if this phi node is always equal to -/// NonPhiInVal. This happens with mutually cyclic phi nodes like: -/// z = some value; x = phi (y, z); y = phi (x, z) -static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, - SmallPtrSet &ValueEqualPHIs) { - // See if we already saw this PHI node. - if (!ValueEqualPHIs.insert(PN)) - return true; - - // Don't scan crazily complex things. - if (ValueEqualPHIs.size() == 16) - return false; - - // Scan the operands to see if they are either phi nodes or are equal to - // the value. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Op = PN->getIncomingValue(i); - if (PHINode *OpPN = dyn_cast(Op)) { - if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) - return false; - } else if (Op != NonPhiInVal) - return false; - } - - return true; -} + do { + if (Idx == NestIdx) + // Add the chain's type. + NewTypes.push_back(NestTy); + if (I == E) + break; -namespace { -struct PHIUsageRecord { - unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) - unsigned Shift; // The amount shifted. - Instruction *Inst; // The trunc instruction. - - PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) - : PHIId(pn), Shift(Sh), Inst(User) {} - - bool operator<(const PHIUsageRecord &RHS) const { - if (PHIId < RHS.PHIId) return true; - if (PHIId > RHS.PHIId) return false; - if (Shift < RHS.Shift) return true; - if (Shift > RHS.Shift) return false; - return Inst->getType()->getPrimitiveSizeInBits() < - RHS.Inst->getType()->getPrimitiveSizeInBits(); - } -}; - -struct LoweredPHIRecord { - PHINode *PN; // The PHI that was lowered. - unsigned Shift; // The amount shifted. - unsigned Width; // The width extracted. - - LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) - : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} - - // Ctor form used by DenseMap. - LoweredPHIRecord(PHINode *pn, unsigned Sh) - : PN(pn), Shift(Sh), Width(0) {} -}; -} + // Add the original type. + NewTypes.push_back(*I); -namespace llvm { - template<> - struct DenseMapInfo { - static inline LoweredPHIRecord getEmptyKey() { - return LoweredPHIRecord(0, 0); - } - static inline LoweredPHIRecord getTombstoneKey() { - return LoweredPHIRecord(0, 1); - } - static unsigned getHashValue(const LoweredPHIRecord &Val) { - return DenseMapInfo::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ - (Val.Width>>3); - } - static bool isEqual(const LoweredPHIRecord &LHS, - const LoweredPHIRecord &RHS) { - return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && - LHS.Width == RHS.Width; - } - }; - template <> - struct isPodLike { static const bool value = true; }; -} + ++Idx, ++I; + } while (1); + } + // Replace the trampoline call with a direct call. Let the generic + // code sort out any function type mismatches. + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, + FTy->isVarArg()); + Constant *NewCallee = + NestF->getType() == PointerType::getUnqual(NewFTy) ? + NestF : ConstantExpr::getBitCast(NestF, + PointerType::getUnqual(NewFTy)); + const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), + NewAttrs.end()); -/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an -/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If -/// so, we split the PHI into the various pieces being extracted. This sort of -/// thing is introduced when SROA promotes an aggregate to large integer values. -/// -/// TODO: The user of the trunc may be an bitcast to float/double/vector or an -/// inttoptr. We should produce new PHIs in the right type. -/// -Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { - // PHIUsers - Keep track of all of the truncated values extracted from a set - // of PHIs, along with their offset. These are the things we want to rewrite. - SmallVector PHIUsers; - - // PHIs are often mutually cyclic, so we keep track of a whole set of PHI - // nodes which are extracted from. PHIsToSlice is a set we use to avoid - // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to - // check the uses of (to ensure they are all extracts). - SmallVector PHIsToSlice; - SmallPtrSet PHIsInspected; - - PHIsToSlice.push_back(&FirstPhi); - PHIsInspected.insert(&FirstPhi); - - for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { - PHINode *PN = PHIsToSlice[PHIId]; - - // Scan the input list of the PHI. If any input is an invoke, and if the - // input is defined in the predecessor, then we won't be split the critical - // edge which is required to insert a truncate. Because of this, we have to - // bail out. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - InvokeInst *II = dyn_cast(PN->getIncomingValue(i)); - if (II == 0) continue; - if (II->getParent() != PN->getIncomingBlock(i)) - continue; - - // If we have a phi, and if it's directly in the predecessor, then we have - // a critical edge where we need to put the truncate. Since we can't - // split the edge in instcombine, we have to bail out. - return 0; - } - - - for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); - UI != E; ++UI) { - Instruction *User = cast(*UI); - - // If the user is a PHI, inspect its uses recursively. - if (PHINode *UserPN = dyn_cast(User)) { - if (PHIsInspected.insert(UserPN)) - PHIsToSlice.push_back(UserPN); - continue; - } - - // Truncates are always ok. - if (isa(User)) { - PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); - continue; + Instruction *NewCaller; + if (InvokeInst *II = dyn_cast(Caller)) { + NewCaller = InvokeInst::Create(NewCallee, + II->getNormalDest(), II->getUnwindDest(), + NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + cast(NewCaller)->setCallingConv(II->getCallingConv()); + cast(NewCaller)->setAttributes(NewPAL); + } else { + NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + if (cast(Caller)->isTailCall()) + cast(NewCaller)->setTailCall(); + cast(NewCaller)-> + setCallingConv(cast(Caller)->getCallingConv()); + cast(NewCaller)->setAttributes(NewPAL); } - - // Otherwise it must be a lshr which can only be used by one trunc. - if (User->getOpcode() != Instruction::LShr || - !User->hasOneUse() || !isa(User->use_back()) || - !isa(User->getOperand(1))) - return 0; - - unsigned Shift = cast(User->getOperand(1))->getZExtValue(); - PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); + if (!Caller->getType()->isVoidTy()) + Caller->replaceAllUsesWith(NewCaller); + Caller->eraseFromParent(); + Worklist.Remove(Caller); + return 0; } } - - // If we have no users, they must be all self uses, just nuke the PHI. - if (PHIUsers.empty()) - return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); - - // If this phi node is transformable, create new PHIs for all the pieces - // extracted out of it. First, sort the users by their offset and size. - array_pod_sort(PHIUsers.begin(), PHIUsers.end()); - - DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) - errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; - ); - - // PredValues - This is a temporary used when rewriting PHI nodes. It is - // hoisted out here to avoid construction/destruction thrashing. - DenseMap PredValues; - - // ExtractedVals - Each new PHI we introduce is saved here so we don't - // introduce redundant PHIs. - DenseMap ExtractedVals; - - for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { - unsigned PHIId = PHIUsers[UserI].PHIId; - PHINode *PN = PHIsToSlice[PHIId]; - unsigned Offset = PHIUsers[UserI].Shift; - const Type *Ty = PHIUsers[UserI].Inst->getType(); - - PHINode *EltPHI; - - // If we've already lowered a user like this, reuse the previously lowered - // value. - if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { - - // Otherwise, Create the new PHI node for this user. - EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); - assert(EltPHI->getType() != PN->getType() && - "Truncate didn't shrink phi?"); - - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *Pred = PN->getIncomingBlock(i); - Value *&PredVal = PredValues[Pred]; - - // If we already have a value for this predecessor, reuse it. - if (PredVal) { - EltPHI->addIncoming(PredVal, Pred); - continue; - } - // Handle the PHI self-reuse case. - Value *InVal = PN->getIncomingValue(i); - if (InVal == PN) { - PredVal = EltPHI; - EltPHI->addIncoming(PredVal, Pred); - continue; - } - - if (PHINode *InPHI = dyn_cast(PN)) { - // If the incoming value was a PHI, and if it was one of the PHIs we - // already rewrote it, just use the lowered value. - if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { - PredVal = Res; - EltPHI->addIncoming(PredVal, Pred); - continue; - } - } - - // Otherwise, do an extract in the predecessor. - Builder->SetInsertPoint(Pred, Pred->getTerminator()); - Value *Res = InVal; - if (Offset) - Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), - Offset), "extract"); - Res = Builder->CreateTrunc(Res, Ty, "extract.t"); - PredVal = Res; - EltPHI->addIncoming(Res, Pred); - - // If the incoming value was a PHI, and if it was one of the PHIs we are - // rewriting, we will ultimately delete the code we inserted. This - // means we need to revisit that PHI to make sure we extract out the - // needed piece. - if (PHINode *OldInVal = dyn_cast(PN->getIncomingValue(i))) - if (PHIsInspected.count(OldInVal)) { - unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), - OldInVal)-PHIsToSlice.begin(); - PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, - cast(Res))); - ++UserE; - } - } - PredValues.clear(); - - DEBUG(errs() << " Made element PHI for offset " << Offset << ": " - << *EltPHI << '\n'); - ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; - } - - // Replace the use of this piece with the PHI node. - ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); - } - - // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) - // with undefs. - Value *Undef = UndefValue::get(FirstPhi.getType()); - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) - ReplaceInstUsesWith(*PHIsToSlice[i], Undef); - return ReplaceInstUsesWith(FirstPhi, Undef); + // Replace the trampoline call with a direct call. Since there is no 'nest' + // parameter, there is no need to adjust the argument list. Let the generic + // code sort out any function type mismatches. + Constant *NewCallee = + NestF->getType() == PTy ? NestF : + ConstantExpr::getBitCast(NestF, PTy); + CS.setCalledFunction(NewCallee); + return CS.getInstruction(); } -// PHINode simplification -// -Instruction *InstCombiner::visitPHINode(PHINode &PN) { - // If LCSSA is around, don't mess with Phi nodes - if (MustPreserveLCSSA) return 0; - - if (Value *V = PN.hasConstantValue()) - return ReplaceInstUsesWith(PN, V); - - // If all PHI operands are the same operation, pull them through the PHI, - // reducing code size. - if (isa(PN.getIncomingValue(0)) && - isa(PN.getIncomingValue(1)) && - cast(PN.getIncomingValue(0))->getOpcode() == - cast(PN.getIncomingValue(1))->getOpcode() && - // FIXME: The hasOneUse check will fail for PHIs that use the value more - // than themselves more than once. - PN.getIncomingValue(0)->hasOneUse()) - if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) - return Result; - - // If this is a trivial cycle in the PHI node graph, remove it. Basically, if - // this PHI only has a single use (a PHI), and if that PHI only has one use (a - // PHI)... break the cycle. - if (PN.hasOneUse()) { - Instruction *PHIUser = cast(PN.use_back()); - if (PHINode *PU = dyn_cast(PHIUser)) { - SmallPtrSet PotentiallyDeadPHIs; - PotentiallyDeadPHIs.insert(&PN); - if (DeadPHICycle(PU, PotentiallyDeadPHIs)) - return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); - } - - // If this phi has a single use, and if that use just computes a value for - // the next iteration of a loop, delete the phi. This occurs with unused - // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this - // common case here is good because the only other things that catch this - // are induction variable analysis (sometimes) and ADCE, which is only run - // late. - if (PHIUser->hasOneUse() && - (isa(PHIUser) || isa(PHIUser)) && - PHIUser->use_back() == &PN) { - return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); - } - } - - // We sometimes end up with phi cycles that non-obviously end up being the - // same value, for example: - // z = some value; x = phi (y, z); y = phi (x, z) - // where the phi nodes don't necessarily need to be in the same block. Do a - // quick check to see if the PHI node only contains a single non-phi value, if - // so, scan to see if the phi cycle is actually equal to that value. - { - unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); - // Scan for the first non-phi operand. - while (InValNo != NumOperandVals && - isa(PN.getIncomingValue(InValNo))) - ++InValNo; - - if (InValNo != NumOperandVals) { - Value *NonPhiInVal = PN.getOperand(InValNo); - - // Scan the rest of the operands to see if there are any conflicts, if so - // there is no need to recursively scan other phis. - for (++InValNo; InValNo != NumOperandVals; ++InValNo) { - Value *OpVal = PN.getIncomingValue(InValNo); - if (OpVal != NonPhiInVal && !isa(OpVal)) - break; - } - - // If we scanned over all operands, then we have one unique value plus - // phi values. Scan PHI nodes to see if they all merge in each other or - // the value. - if (InValNo == NumOperandVals) { - SmallPtrSet ValueEqualPHIs; - if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) - return ReplaceInstUsesWith(PN, NonPhiInVal); - } - } - } - - // If there are multiple PHIs, sort their operands so that they all list - // the blocks in the same order. This will help identical PHIs be eliminated - // by other passes. Other passes shouldn't depend on this for correctness - // however. - PHINode *FirstPN = cast(PN.getParent()->begin()); - if (&PN != FirstPN) - for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *BBA = PN.getIncomingBlock(i); - BasicBlock *BBB = FirstPN->getIncomingBlock(i); - if (BBA != BBB) { - Value *VA = PN.getIncomingValue(i); - unsigned j = PN.getBasicBlockIndex(BBB); - Value *VB = PN.getIncomingValue(j); - PN.setIncomingBlock(i, BBB); - PN.setIncomingValue(i, VB); - PN.setIncomingBlock(j, BBA); - PN.setIncomingValue(j, VA); - // NOTE: Instcombine normally would want us to "return &PN" if we - // modified any of the operands of an instruction. However, since we - // aren't adding or removing uses (just rearranging them) we don't do - // this in this case. - } - } - // If this is an integer PHI and we know that it has an illegal type, see if - // it is only used by trunc or trunc(lshr) operations. If so, we split the - // PHI into the various pieces being extracted. This sort of thing is - // introduced when SROA promotes an aggregate to a single large integer type. - if (isa(PN.getType()) && TD && - !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) - if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) - return Res; - - return 0; -} Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); @@ -11956,7 +5678,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { !isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { // Determine how much the GEP moves the pointer. We are guaranteed to get // a constant back from EmitGEPOffset. - ConstantInt *OffsetV = cast(EmitGEPOffset(&GEP, *this)); + ConstantInt *OffsetV = cast(EmitGEPOffset(&GEP)); int64_t Offset = OffsetV->getSExtValue(); // If this GEP instruction doesn't move the pointer, just replace the GEP @@ -11985,666 +5707,60 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector NewIndices; const Type *InTy = cast(BCI->getOperand(0)->getType())->getElementType(); - if (FindElementAtOffset(InTy, Offset, NewIndices, TD)) { + if (FindElementAtOffset(InTy, Offset, NewIndices)) { Value *NGEP = cast(&GEP)->isInBounds() ? Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), NewIndices.end()) : Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), NewIndices.end()); - if (NGEP->getType() == GEP.getType()) - return ReplaceInstUsesWith(GEP, NGEP); - NGEP->takeName(&GEP); - return new BitCastInst(NGEP, GEP.getType()); - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { - // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 - if (AI.isArrayAllocation()) { // Check C != 1 - if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { - const Type *NewTy = - ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - assert(isa(AI) && "Unknown type of allocation inst!"); - AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); - New->setAlignment(AI.getAlignment()); - - // Scan to the end of the allocation instructions, to skip over a block of - // allocas if possible...also skip interleaved debug info - // - BasicBlock::iterator It = New; - while (isa(*It) || isa(*It)) ++It; - - // Now that I is pointing to the first non-allocation-inst in the block, - // insert our getelementptr instruction... - // - Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); - Value *Idx[2]; - Idx[0] = NullIdx; - Idx[1] = NullIdx; - Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, - New->getName()+".sub", It); - - // Now make everything use the getelementptr instead of the original - // allocation. - return ReplaceInstUsesWith(AI, V); - } else if (isa(AI.getArraySize())) { - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - } - } - - if (TD && isa(AI) && AI.getAllocatedType()->isSized()) { - // If alloca'ing a zero byte object, replace the alloca with a null pointer. - // Note that we only do this for alloca's, because malloc should allocate - // and return a unique pointer, even for a zero byte allocation. - if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - - // If the alignment is 0 (unspecified), assign it the preferred alignment. - if (AI.getAlignment() == 0) - AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); - } - - return 0; -} - -Instruction *InstCombiner::visitFree(Instruction &FI) { - Value *Op = FI.getOperand(1); - - // free undef -> unreachable. - if (isa(Op)) { - // Insert a new store to null because we cannot modify the CFG here. - new StoreInst(ConstantInt::getTrue(FI.getContext()), - UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI); - return EraseInstFromFunction(FI); - } - - // If we have 'free null' delete the instruction. This can happen in stl code - // when lots of inlining happens. - if (isa(Op)) - return EraseInstFromFunction(FI); - - // If we have a malloc call whose only use is a free call, delete both. - if (isMalloc(Op)) { - if (CallInst* CI = extractMallocCallFromBitCast(Op)) { - if (Op->hasOneUse() && CI->hasOneUse()) { - EraseInstFromFunction(FI); - EraseInstFromFunction(*CI); - return EraseInstFromFunction(*cast(Op)); - } - } else { - // Op is a call to malloc - if (Op->hasOneUse()) { - EraseInstFromFunction(FI); - return EraseInstFromFunction(*cast(Op)); - } - } - } - - return 0; -} - -/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. -static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, - const TargetData *TD) { - User *CI = cast(LI.getOperand(0)); - Value *CastOp = CI->getOperand(0); - - const PointerType *DestTy = cast(CI->getType()); - const Type *DestPTy = DestTy->getElementType(); - if (const PointerType *SrcTy = dyn_cast(CastOp->getType())) { - - // If the address spaces don't match, don't eliminate the cast. - if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) - return 0; - - const Type *SrcPTy = SrcTy->getElementType(); - - if (DestPTy->isInteger() || isa(DestPTy) || - isa(DestPTy)) { - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (const ArrayType *ASrcTy = dyn_cast(SrcPTy)) - if (Constant *CSrc = dyn_cast(CastOp)) - if (ASrcTy->getNumElements() != 0) { - Value *Idxs[2]; - Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); - Idxs[1] = Idxs[0]; - CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); - SrcTy = cast(CastOp->getType()); - SrcPTy = SrcTy->getElementType(); - } - - if (IC.getTargetData() && - (SrcPTy->isInteger() || isa(SrcPTy) || - isa(SrcPTy)) && - // Do not allow turning this into a load of an integer, which is then - // casted to a pointer, this pessimizes pointer analysis a lot. - (isa(SrcPTy) == isa(LI.getType())) && - IC.getTargetData()->getTypeSizeInBits(SrcPTy) == - IC.getTargetData()->getTypeSizeInBits(DestPTy)) { - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before the load, cast - // the result of the loaded value. - Value *NewLoad = - IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); - // Now cast the result of the load. - return new BitCastInst(NewLoad, LI.getType()); - } - } - } - return 0; -} - -Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { - Value *Op = LI.getOperand(0); - - // Attempt to improve the alignment. - if (TD) { - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); - if (KnownAlign > - (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : - LI.getAlignment())) - LI.setAlignment(KnownAlign); - } - - // load (cast X) --> cast (load X) iff safe. - if (isa(Op)) - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - - // None of the following transforms are legal for volatile loads. - if (LI.isVolatile()) return 0; - - // Do really simple store-to-load forwarding and load CSE, to catch cases - // where there are several consequtive memory accesses to the same location, - // separated by a few arithmetic operations. - BasicBlock::iterator BBI = &LI; - if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) - return ReplaceInstUsesWith(LI, AvailableVal); - - // load(gep null, ...) -> unreachable - if (GetElementPtrInst *GEPI = dyn_cast(Op)) { - const Value *GEPI0 = GEPI->getOperand(0); - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa(GEPI0) && GEPI->getPointerAddressSpace() == 0){ - // Insert a new store to null instruction before the load to indicate - // that this code is not reachable. We do this instead of inserting - // an unreachable instruction directly because we cannot modify the - // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - } - - // load null/undef -> unreachable - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa(Op) || - (isa(Op) && LI.getPointerAddressSpace() == 0)) { - // Insert a new store to null instruction before the load to indicate that - // this code is not reachable. We do this instead of inserting an - // unreachable instruction directly because we cannot modify the CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - - // Instcombine load (constantexpr_cast global) -> cast (load global) - if (ConstantExpr *CE = dyn_cast(Op)) - if (CE->isCast()) - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - - if (Op->hasOneUse()) { - // Change select and PHI nodes to select values instead of addresses: this - // helps alias analysis out a lot, allows many others simplifications, and - // exposes redundancy in the code. - // - // Note that we cannot do the transformation unless we know that the - // introduced loads cannot trap! Something like this is valid as long as - // the condition is always false: load (select bool %C, int* null, int* %G), - // but it would not be valid if we transformed it to load from null - // unconditionally. - // - if (SelectInst *SI = dyn_cast(Op)) { - // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { - Value *V1 = Builder->CreateLoad(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"); - Value *V2 = Builder->CreateLoad(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"); - return SelectInst::Create(SI->getCondition(), V1, V2); + if (NGEP->getType() == GEP.getType()) + return ReplaceInstUsesWith(GEP, NGEP); + NGEP->takeName(&GEP); + return new BitCastInst(NGEP, GEP.getType()); } - - // load (select (cond, null, P)) -> load P - if (Constant *C = dyn_cast(SI->getOperand(1))) - if (C->isNullValue()) { - LI.setOperand(0, SI->getOperand(2)); - return &LI; - } - - // load (select (cond, P, null)) -> load P - if (Constant *C = dyn_cast(SI->getOperand(2))) - if (C->isNullValue()) { - LI.setOperand(0, SI->getOperand(1)); - return &LI; - } } - } + } + return 0; } -/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P -/// when possible. This makes it generally easy to do alias analysis and/or -/// SROA/mem2reg of the memory object. -static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { - User *CI = cast(SI.getOperand(1)); - Value *CastOp = CI->getOperand(0); - - const Type *DestPTy = cast(CI->getType())->getElementType(); - const PointerType *SrcTy = dyn_cast(CastOp->getType()); - if (SrcTy == 0) return 0; - - const Type *SrcPTy = SrcTy->getElementType(); +Instruction *InstCombiner::visitFree(Instruction &FI) { + Value *Op = FI.getOperand(1); - if (!DestPTy->isInteger() && !isa(DestPTy)) - return 0; - - /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" - /// to its first element. This allows us to handle things like: - /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) - /// on 32-bit hosts. - SmallVector NewGEPIndices; - - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (isa(SrcPTy) || isa(SrcPTy)) { - // Index through pointer. - Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); - NewGEPIndices.push_back(Zero); - - while (1) { - if (const StructType *STy = dyn_cast(SrcPTy)) { - if (!STy->getNumElements()) /* Struct can be empty {} */ - break; - NewGEPIndices.push_back(Zero); - SrcPTy = STy->getElementType(0); - } else if (const ArrayType *ATy = dyn_cast(SrcPTy)) { - NewGEPIndices.push_back(Zero); - SrcPTy = ATy->getElementType(); - } else { - break; - } - } - - SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); + // free undef -> unreachable. + if (isa(Op)) { + // Insert a new store to null because we cannot modify the CFG here. + new StoreInst(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI); + return EraseInstFromFunction(FI); } - - if (!SrcPTy->isInteger() && !isa(SrcPTy)) - return 0; - // If the pointers point into different address spaces or if they point to - // values with different sizes, we can't do the transformation. - if (!IC.getTargetData() || - SrcTy->getAddressSpace() != - cast(CI->getType())->getAddressSpace() || - IC.getTargetData()->getTypeSizeInBits(SrcPTy) != - IC.getTargetData()->getTypeSizeInBits(DestPTy)) - return 0; - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before - // the store, cast the value to be stored. - Value *NewCast; - Value *SIOp0 = SI.getOperand(0); - Instruction::CastOps opcode = Instruction::BitCast; - const Type* CastSrcTy = SIOp0->getType(); - const Type* CastDstTy = SrcPTy; - if (isa(CastDstTy)) { - if (CastSrcTy->isInteger()) - opcode = Instruction::IntToPtr; - } else if (isa(CastDstTy)) { - if (isa(SIOp0->getType())) - opcode = Instruction::PtrToInt; - } - - // SIOp0 is a pointer to aggregate and this is a store to the first field, - // emit a GEP to index into its first field. - if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), - NewGEPIndices.end()); - - NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, - SIOp0->getName()+".c"); - return new StoreInst(NewCast, CastOp); -} - -/// equivalentAddressValues - Test if A and B will obviously have the same -/// value. This includes recognizing that %t0 and %t1 will have the same -/// value in code like this: -/// %t0 = getelementptr \@a, 0, 3 -/// store i32 0, i32* %t0 -/// %t1 = getelementptr \@a, 0, 3 -/// %t2 = load i32* %t1 -/// -static bool equivalentAddressValues(Value *A, Value *B) { - // Test if the values are trivially equivalent. - if (A == B) return true; - - // Test if the values come form identical arithmetic instructions. - // This uses isIdenticalToWhenDefined instead of isIdenticalTo because - // its only used to compare two uses within the same basic block, which - // means that they'll always either have the same value or one of them - // will have an undefined value. - if (isa(A) || - isa(A) || - isa(A) || - isa(A)) - if (Instruction *BI = dyn_cast(B)) - if (cast(A)->isIdenticalToWhenDefined(BI)) - return true; - - // Otherwise they may not be equivalent. - return false; -} - -// If this instruction has two uses, one of which is a llvm.dbg.declare, -// return the llvm.dbg.declare. -DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { - if (!V->hasNUses(2)) - return 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (DbgDeclareInst *DI = dyn_cast(UI)) - return DI; - if (isa(UI) && UI->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast(UI->use_begin())) - return DI; - } - } - return 0; -} - -Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { - Value *Val = SI.getOperand(0); - Value *Ptr = SI.getOperand(1); - - // If the RHS is an alloca with a single use, zapify the store, making the - // alloca dead. - // If the RHS is an alloca with a two uses, the other one being a - // llvm.dbg.declare, zapify the store and the declare, making the - // alloca dead. We must do this to prevent declare's from affecting - // codegen. - if (!SI.isVolatile()) { - if (Ptr->hasOneUse()) { - if (isa(Ptr)) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { - if (isa(GEP->getOperand(0))) { - if (GEP->getOperand(0)->hasOneUse()) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { - EraseInstFromFunction(*DI); - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - } - } - } - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { - EraseInstFromFunction(*DI); - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - } + // If we have 'free null' delete the instruction. This can happen in stl code + // when lots of inlining happens. + if (isa(Op)) + return EraseInstFromFunction(FI); - // Attempt to improve the alignment. - if (TD) { - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); - if (KnownAlign > - (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : - SI.getAlignment())) - SI.setAlignment(KnownAlign); - } - - // Do really simple DSE, to catch cases where there are several consecutive - // stores to the same location, separated by a few arithmetic operations. This - // situation often occurs with bitfield accesses. - BasicBlock::iterator BBI = &SI; - for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; - --ScanInsts) { - --BBI; - // Don't count debug info directives, lest they affect codegen, - // and we skip pointer-to-pointer bitcasts, which are NOPs. - // It is necessary for correctness to skip those that feed into a - // llvm.dbg.declare, as these are not present when debugging is off. - if (isa(BBI) || - (isa(BBI) && isa(BBI->getType()))) { - ScanInsts++; - continue; - } - - if (StoreInst *PrevSI = dyn_cast(BBI)) { - // Prev store isn't volatile, and stores to the same location? - if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), - SI.getOperand(1))) { - ++NumDeadStore; - ++BBI; - EraseInstFromFunction(*PrevSI); - continue; + // If we have a malloc call whose only use is a free call, delete both. + if (isMalloc(Op)) { + if (CallInst* CI = extractMallocCallFromBitCast(Op)) { + if (Op->hasOneUse() && CI->hasOneUse()) { + EraseInstFromFunction(FI); + EraseInstFromFunction(*CI); + return EraseInstFromFunction(*cast(Op)); } - break; - } - - // If this is a load, we have to stop. However, if the loaded value is from - // the pointer we're loading and is producing the pointer we're storing, - // then *this* store is dead (X = load P; store X -> P). - if (LoadInst *LI = dyn_cast(BBI)) { - if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - !SI.isVolatile()) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; + } else { + // Op is a call to malloc + if (Op->hasOneUse()) { + EraseInstFromFunction(FI); + return EraseInstFromFunction(*cast(Op)); } - // Otherwise, this is a load from some other location. Stores before it - // may not be dead. - break; - } - - // Don't skip over loads or things that can modify memory. - if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) - break; - } - - - if (SI.isVolatile()) return 0; // Don't hack volatile stores. - - // store X, null -> turns into 'unreachable' in SimplifyCFG - if (isa(Ptr) && SI.getPointerAddressSpace() == 0) { - if (!isa(Val)) { - SI.setOperand(0, UndefValue::get(Val->getType())); - if (Instruction *U = dyn_cast(Val)) - Worklist.Add(U); // Dropped a use. - ++NumCombined; } - return 0; // Do not modify these! - } - - // store undef, Ptr -> noop - if (isa(Val)) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; } - // If the pointer destination is a cast, see if we can fold the cast into the - // source instead. - if (isa(Ptr)) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - if (ConstantExpr *CE = dyn_cast(Ptr)) - if (CE->isCast()) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - - - // If this store is the last instruction in the basic block (possibly - // excepting debug info instructions and the pointer bitcasts that feed - // into them), and if the block ends with an unconditional branch, try - // to move it to the successor block. - BBI = &SI; - do { - ++BBI; - } while (isa(BBI) || - (isa(BBI) && isa(BBI->getType()))); - if (BranchInst *BI = dyn_cast(BBI)) - if (BI->isUnconditional()) - if (SimplifyStoreAtEndOfBlock(SI)) - return 0; // xform done! - return 0; } -/// SimplifyStoreAtEndOfBlock - Turn things like: -/// if () { *P = v1; } else { *P = v2 } -/// into a phi node with a store in the successor. -/// -/// Simplify things like: -/// *P = v1; if () { *P = v2; } -/// into a phi node with a store in the successor. -/// -bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { - BasicBlock *StoreBB = SI.getParent(); - - // Check to see if the successor block has exactly two incoming edges. If - // so, see if the other predecessor contains a store to the same location. - // if so, insert a PHI node (if needed) and move the stores down. - BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); - - // Determine whether Dest has exactly two predecessors and, if so, compute - // the other predecessor. - pred_iterator PI = pred_begin(DestBB); - BasicBlock *OtherBB = 0; - if (*PI != StoreBB) - OtherBB = *PI; - ++PI; - if (PI == pred_end(DestBB)) - return false; - - if (*PI != StoreBB) { - if (OtherBB) - return false; - OtherBB = *PI; - } - if (++PI != pred_end(DestBB)) - return false; - - // Bail out if all the relevant blocks aren't distinct (this can happen, - // for example, if SI is in an infinite loop) - if (StoreBB == DestBB || OtherBB == DestBB) - return false; - - // Verify that the other block ends in a branch and is not otherwise empty. - BasicBlock::iterator BBI = OtherBB->getTerminator(); - BranchInst *OtherBr = dyn_cast(BBI); - if (!OtherBr || BBI == OtherBB->begin()) - return false; - - // If the other block ends in an unconditional branch, check for the 'if then - // else' case. there is an instruction before the branch. - StoreInst *OtherStore = 0; - if (OtherBr->isUnconditional()) { - --BBI; - // Skip over debugging info. - while (isa(BBI) || - (isa(BBI) && isa(BBI->getType()))) { - if (BBI==OtherBB->begin()) - return false; - --BBI; - } - // If this isn't a store, isn't a store to the same location, or if the - // alignments differ, bail out. - OtherStore = dyn_cast(BBI); - if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) - return false; - } else { - // Otherwise, the other block ended with a conditional branch. If one of the - // destinations is StoreBB, then we have the if/then case. - if (OtherBr->getSuccessor(0) != StoreBB && - OtherBr->getSuccessor(1) != StoreBB) - return false; - - // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an - // if/then triangle. See if there is a store to the same ptr as SI that - // lives in OtherBB. - for (;; --BBI) { - // Check to see if we find the matching store. - if ((OtherStore = dyn_cast(BBI))) { - if (OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) - return false; - break; - } - // If we find something that may be using or overwriting the stored - // value, or if we run out of instructions, we can't do the xform. - if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || - BBI == OtherBB->begin()) - return false; - } - - // In order to eliminate the store in OtherBr, we have to - // make sure nothing reads or overwrites the stored value in - // StoreBB. - for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { - // FIXME: This should really be AA driven. - if (I->mayReadFromMemory() || I->mayWriteToMemory()) - return false; - } - } - - // Insert a PHI node now if we need it. - Value *MergedVal = OtherStore->getOperand(0); - if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); - PN->reserveOperandSpace(2); - PN->addIncoming(SI.getOperand(0), SI.getParent()); - PN->addIncoming(OtherStore->getOperand(0), OtherBB); - MergedVal = InsertNewInstBefore(PN, DestBB->front()); - } - - // Advance to a place where it is safe to insert the new store and - // insert it. - BBI = DestBB->getFirstNonPHI(); - InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile(), - SI.getAlignment()), *BBI); - - // Nuke the old stores. - EraseInstFromFunction(SI); - EraseInstFromFunction(*OtherStore); - ++NumCombined; - return true; -} Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { @@ -12840,546 +5956,6 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return 0; } -/// CheapToScalarize - Return true if the value is cheaper to scalarize than it -/// is to leave as a vector operation. -static bool CheapToScalarize(Value *V, bool isConstant) { - if (isa(V)) - return true; - if (ConstantVector *C = dyn_cast(V)) { - if (isConstant) return true; - // If all elts are the same, we can extract. - Constant *Op0 = C->getOperand(0); - for (unsigned i = 1; i < C->getNumOperands(); ++i) - if (C->getOperand(i) != Op0) - return false; - return true; - } - Instruction *I = dyn_cast(V); - if (!I) return false; - - // Insert element gets simplified to the inserted element or is deleted if - // this is constant idx extract element and its a constant idx insertelt. - if (I->getOpcode() == Instruction::InsertElement && isConstant && - isa(I->getOperand(2))) - return true; - if (I->getOpcode() == Instruction::Load && I->hasOneUse()) - return true; - if (BinaryOperator *BO = dyn_cast(I)) - if (BO->hasOneUse() && - (CheapToScalarize(BO->getOperand(0), isConstant) || - CheapToScalarize(BO->getOperand(1), isConstant))) - return true; - if (CmpInst *CI = dyn_cast(I)) - if (CI->hasOneUse() && - (CheapToScalarize(CI->getOperand(0), isConstant) || - CheapToScalarize(CI->getOperand(1), isConstant))) - return true; - - return false; -} - -/// Read and decode a shufflevector mask. -/// -/// It turns undef elements into values that are larger than the number of -/// elements in the input. -static std::vector getShuffleMask(const ShuffleVectorInst *SVI) { - unsigned NElts = SVI->getType()->getNumElements(); - if (isa(SVI->getOperand(2))) - return std::vector(NElts, 0); - if (isa(SVI->getOperand(2))) - return std::vector(NElts, 2*NElts); - - std::vector Result; - const ConstantVector *CP = cast(SVI->getOperand(2)); - for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) - if (isa(*i)) - Result.push_back(NElts*2); // undef -> 8 - else - Result.push_back(cast(*i)->getZExtValue()); - return Result; -} - -/// FindScalarElement - Given a vector and an element number, see if the scalar -/// value is already around as a register, for example if it were inserted then -/// extracted from the vector. -static Value *FindScalarElement(Value *V, unsigned EltNo) { - assert(isa(V->getType()) && "Not looking at a vector?"); - const VectorType *PTy = cast(V->getType()); - unsigned Width = PTy->getNumElements(); - if (EltNo >= Width) // Out of range access. - return UndefValue::get(PTy->getElementType()); - - if (isa(V)) - return UndefValue::get(PTy->getElementType()); - else if (isa(V)) - return Constant::getNullValue(PTy->getElementType()); - else if (ConstantVector *CP = dyn_cast(V)) - return CP->getOperand(EltNo); - else if (InsertElementInst *III = dyn_cast(V)) { - // If this is an insert to a variable element, we don't know what it is. - if (!isa(III->getOperand(2))) - return 0; - unsigned IIElt = cast(III->getOperand(2))->getZExtValue(); - - // If this is an insert to the element we are looking for, return the - // inserted value. - if (EltNo == IIElt) - return III->getOperand(1); - - // Otherwise, the insertelement doesn't modify the value, recurse on its - // vector input. - return FindScalarElement(III->getOperand(0), EltNo); - } else if (ShuffleVectorInst *SVI = dyn_cast(V)) { - unsigned LHSWidth = - cast(SVI->getOperand(0)->getType())->getNumElements(); - unsigned InEl = getShuffleMask(SVI)[EltNo]; - if (InEl < LHSWidth) - return FindScalarElement(SVI->getOperand(0), InEl); - else if (InEl < LHSWidth*2) - return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); - else - return UndefValue::get(PTy->getElementType()); - } - - // Otherwise, we don't know. - return 0; -} - -Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - // If vector val is undef, replace extract with scalar undef. - if (isa(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - - // If vector val is constant 0, replace extract with scalar 0. - if (isa(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); - - if (ConstantVector *C = dyn_cast(EI.getOperand(0))) { - // If vector val is constant with all elements the same, replace EI with - // that element. When the elements are not identical, we cannot replace yet - // (we do that below, but only when the index is constant). - Constant *op0 = C->getOperand(0); - for (unsigned i = 1; i != C->getNumOperands(); ++i) - if (C->getOperand(i) != op0) { - op0 = 0; - break; - } - if (op0) - return ReplaceInstUsesWith(EI, op0); - } - - // If extracting a specified index from the vector, see if we can recursively - // find a previously computed scalar that was inserted into the vector. - if (ConstantInt *IdxC = dyn_cast(EI.getOperand(1))) { - unsigned IndexVal = IdxC->getZExtValue(); - unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - - // If this is extracting an invalid index, turn this into undef, to avoid - // crashing the code below. - if (IndexVal >= VectorWidth) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - - // This instruction only demands the single element from the input vector. - // If the input vector has a single use, simplify it based on this use - // property. - if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { - APInt UndefElts(VectorWidth, 0); - APInt DemandedMask(VectorWidth, 1 << IndexVal); - if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), - DemandedMask, UndefElts)) { - EI.setOperand(0, V); - return &EI; - } - } - - if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) - return ReplaceInstUsesWith(EI, Elt); - - // If the this extractelement is directly using a bitcast from a vector of - // the same number of elements, see if we can find the source element from - // it. In this case, we will end up needing to bitcast the scalars. - if (BitCastInst *BCI = dyn_cast(EI.getOperand(0))) { - if (const VectorType *VT = - dyn_cast(BCI->getOperand(0)->getType())) - if (VT->getNumElements() == VectorWidth) - if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) - return new BitCastInst(Elt, EI.getType()); - } - } - - if (Instruction *I = dyn_cast(EI.getOperand(0))) { - // Push extractelement into predecessor operation if legal and - // profitable to do so - if (BinaryOperator *BO = dyn_cast(I)) { - if (I->hasOneUse() && - CheapToScalarize(BO, isa(EI.getOperand(1)))) { - Value *newEI0 = - Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), - EI.getName()+".lhs"); - Value *newEI1 = - Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), - EI.getName()+".rhs"); - return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); - } - } else if (InsertElementInst *IE = dyn_cast(I)) { - // Extracting the inserted element? - if (IE->getOperand(2) == EI.getOperand(1)) - return ReplaceInstUsesWith(EI, IE->getOperand(1)); - // If the inserted and extracted elements are constants, they must not - // be the same value, extract from the pre-inserted value instead. - if (isa(IE->getOperand(2)) && isa(EI.getOperand(1))) { - Worklist.AddValue(EI.getOperand(0)); - EI.setOperand(0, IE->getOperand(0)); - return &EI; - } - } else if (ShuffleVectorInst *SVI = dyn_cast(I)) { - // If this is extracting an element from a shufflevector, figure out where - // it came from and extract from the appropriate input element instead. - if (ConstantInt *Elt = dyn_cast(EI.getOperand(1))) { - unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; - Value *Src; - unsigned LHSWidth = - cast(SVI->getOperand(0)->getType())->getNumElements(); - - if (SrcIdx < LHSWidth) - Src = SVI->getOperand(0); - else if (SrcIdx < LHSWidth*2) { - SrcIdx -= LHSWidth; - Src = SVI->getOperand(1); - } else { - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - } - return ExtractElementInst::Create(Src, - ConstantInt::get(Type::getInt32Ty(EI.getContext()), - SrcIdx, false)); - } - } - // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) - } - return 0; -} - -/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns -/// elements from either LHS or RHS, return the shuffle mask and true. -/// Otherwise, return false. -static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, - std::vector &Mask) { - assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && - "Invalid CollectSingleShuffleElements"); - unsigned NumElts = cast(V->getType())->getNumElements(); - - if (isa(V)) { - Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); - return true; - } - - if (V == LHS) { - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); - return true; - } - - if (V == RHS) { - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), - i+NumElts)); - return true; - } - - if (InsertElementInst *IEI = dyn_cast(V)) { - // If this is an insert of an extract from some other vector, include it. - Value *VecOp = IEI->getOperand(0); - Value *ScalarOp = IEI->getOperand(1); - Value *IdxOp = IEI->getOperand(2); - - if (!isa(IdxOp)) - return false; - unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); - - if (isa(ScalarOp)) { // inserting undef into vector. - // Okay, we can handle this if the vector we are insertinting into is - // transitively ok. - if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { - // If so, update the mask to reflect the inserted undef. - Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); - return true; - } - } else if (ExtractElementInst *EI = dyn_cast(ScalarOp)){ - if (isa(EI->getOperand(1)) && - EI->getOperand(0)->getType() == V->getType()) { - unsigned ExtractedIdx = - cast(EI->getOperand(1))->getZExtValue(); - - // This must be extracting from either LHS or RHS. - if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { - // Okay, we can handle this if the vector we are insertinting into is - // transitively ok. - if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { - // If so, update the mask to reflect the inserted value. - if (EI->getOperand(0) == LHS) { - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(V->getContext()), - ExtractedIdx); - } else { - assert(EI->getOperand(0) == RHS); - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(V->getContext()), - ExtractedIdx+NumElts); - - } - return true; - } - } - } - } - } - // TODO: Handle shufflevector here! - - return false; -} - -/// CollectShuffleElements - We are building a shuffle of V, using RHS as the -/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask -/// that computes V and the LHS value of the shuffle. -static Value *CollectShuffleElements(Value *V, std::vector &Mask, - Value *&RHS) { - assert(isa(V->getType()) && - (RHS == 0 || V->getType() == RHS->getType()) && - "Invalid shuffle!"); - unsigned NumElts = cast(V->getType())->getNumElements(); - - if (isa(V)) { - Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); - return V; - } else if (isa(V)) { - Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); - return V; - } else if (InsertElementInst *IEI = dyn_cast(V)) { - // If this is an insert of an extract from some other vector, include it. - Value *VecOp = IEI->getOperand(0); - Value *ScalarOp = IEI->getOperand(1); - Value *IdxOp = IEI->getOperand(2); - - if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { - if (isa(EI->getOperand(1)) && isa(IdxOp) && - EI->getOperand(0)->getType() == V->getType()) { - unsigned ExtractedIdx = - cast(EI->getOperand(1))->getZExtValue(); - unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); - - // Either the extracted from or inserted into vector must be RHSVec, - // otherwise we'd end up with a shuffle of three inputs. - if (EI->getOperand(0) == RHS || RHS == 0) { - RHS = EI->getOperand(0); - Value *V = CollectShuffleElements(VecOp, Mask, RHS); - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(V->getContext()), - NumElts+ExtractedIdx); - return V; - } - - if (VecOp == RHS) { - Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); - // Everything but the extracted element is replaced with the RHS. - for (unsigned i = 0; i != NumElts; ++i) { - if (i != InsertedIdx) - Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), - NumElts+i); - } - return V; - } - - // If this insertelement is a chain that comes from exactly these two - // vectors, return the vector and the effective shuffle. - if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) - return EI->getOperand(0); - } - } - } - // TODO: Handle shufflevector here! - - // Otherwise, can't do anything fancy. Return an identity vector. - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); - return V; -} - -Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { - Value *VecOp = IE.getOperand(0); - Value *ScalarOp = IE.getOperand(1); - Value *IdxOp = IE.getOperand(2); - - // Inserting an undef or into an undefined place, remove this. - if (isa(ScalarOp) || isa(IdxOp)) - ReplaceInstUsesWith(IE, VecOp); - - // If the inserted element was extracted from some other vector, and if the - // indexes are constant, try to turn this into a shufflevector operation. - if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { - if (isa(EI->getOperand(1)) && isa(IdxOp) && - EI->getOperand(0)->getType() == IE.getType()) { - unsigned NumVectorElts = IE.getType()->getNumElements(); - unsigned ExtractedIdx = - cast(EI->getOperand(1))->getZExtValue(); - unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); - - if (ExtractedIdx >= NumVectorElts) // Out of range extract. - return ReplaceInstUsesWith(IE, VecOp); - - if (InsertedIdx >= NumVectorElts) // Out of range insert. - return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); - - // If we are extracting a value from a vector, then inserting it right - // back into the same place, just use the input vector. - if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) - return ReplaceInstUsesWith(IE, VecOp); - - // If this insertelement isn't used by some other insertelement, turn it - // (and any insertelements it points to), into one big shuffle. - if (!IE.hasOneUse() || !isa(IE.use_back())) { - std::vector Mask; - Value *RHS = 0; - Value *LHS = CollectShuffleElements(&IE, Mask, RHS); - if (RHS == 0) RHS = UndefValue::get(LHS->getType()); - // We now have a shuffle of LHS, RHS, Mask. - return new ShuffleVectorInst(LHS, RHS, - ConstantVector::get(Mask)); - } - } - } - - unsigned VWidth = cast(VecOp->getType())->getNumElements(); - APInt UndefElts(VWidth, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) - return &IE; - - return 0; -} - - -Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { - Value *LHS = SVI.getOperand(0); - Value *RHS = SVI.getOperand(1); - std::vector Mask = getShuffleMask(&SVI); - - bool MadeChange = false; - - // Undefined shuffle mask -> undefined value. - if (isa(SVI.getOperand(2))) - return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); - - unsigned VWidth = cast(SVI.getType())->getNumElements(); - - if (VWidth != cast(LHS->getType())->getNumElements()) - return 0; - - APInt UndefElts(VWidth, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { - LHS = SVI.getOperand(0); - RHS = SVI.getOperand(1); - MadeChange = true; - } - - // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') - // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). - if (LHS == RHS || isa(LHS)) { - if (isa(LHS) && LHS == RHS) { - // shuffle(undef,undef,mask) -> undef. - return ReplaceInstUsesWith(SVI, LHS); - } - - // Remap any references to RHS to use LHS. - std::vector Elts; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= 2*e) - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - else { - if ((Mask[i] >= e && isa(RHS)) || - (Mask[i] < e && isa(LHS))) { - Mask[i] = 2*e; // Turn into undef. - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - } else { - Mask[i] = Mask[i] % e; // Force to LHS. - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - Mask[i])); - } - } - } - SVI.setOperand(0, SVI.getOperand(1)); - SVI.setOperand(1, UndefValue::get(RHS->getType())); - SVI.setOperand(2, ConstantVector::get(Elts)); - LHS = SVI.getOperand(0); - RHS = SVI.getOperand(1); - MadeChange = true; - } - - // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID = true, isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= e*2) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } - - // Eliminate identity shuffles. - if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); - if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); - - // If the LHS is a shufflevector itself, see if we can combine it with this - // one without producing an unusual shuffle. Here we are really conservative: - // we are absolutely afraid of producing a shuffle mask not in the input - // program, because the code gen may not be smart enough to turn a merged - // shuffle into two specific shuffles: it may produce worse code. As such, - // we only merge two shuffles if the result is one of the two input shuffle - // masks. In this case, merging the shuffles just removes one instruction, - // which we know is safe. This is good for things like turning: - // (splat(splat)) -> splat. - if (ShuffleVectorInst *LHSSVI = dyn_cast(LHS)) { - if (isa(RHS)) { - std::vector LHSMask = getShuffleMask(LHSSVI); - - if (LHSMask.size() == Mask.size()) { - std::vector NewMask; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= e) - NewMask.push_back(2*e); - else - NewMask.push_back(LHSMask[Mask[i]]); - - // If the result mask is equal to the src shuffle or this - // shuffle mask, do the replacement. - if (NewMask == LHSMask || NewMask == Mask) { - unsigned LHSInNElts = - cast(LHSSVI->getOperand(0)->getType())-> - getNumElements(); - std::vector Elts; - for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(UndefValue::get( - Type::getInt32Ty(SVI.getContext()))); - } else { - Elts.push_back(ConstantInt::get( - Type::getInt32Ty(SVI.getContext()), - NewMask[i])); - } - } - return new ShuffleVectorInst(LHSSVI->getOperand(0), - LHSSVI->getOperand(1), - ConstantVector::get(Elts)); - } - } - } - } - - return MadeChange ? &SVI : 0; -} - @@ -13410,7 +5986,6 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); - CopyPrecedingStopPoint(I, InsertPos); I->moveBefore(InsertPos); ++NumSunkInst; return true;