Change Intrinsic::getDeclaration and friends to take an ArrayRef.

[oota-llvm.git] / lib / Transforms / InstCombine / InstCombineAndOrXor.cpp
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

index e9d72a4153e024e412a9abf010491d9197147ef1..64ea36fb1e9dd2bdf1ee4a221335305c1de50151 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -14,6 +14,7 @@
  #include "InstCombine.h"
  #include "llvm/Intrinsics.h"
  #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/ConstantRange.h"
  #include "llvm/Support/PatternMatch.h"
  using namespace llvm;
  using namespace PatternMatch;
@@ -172,7 +173,9 @@ static Value *getFCmpValue(bool isordered, unsigned code,
    case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
    case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
    case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
-  case 7: return ConstantInt::getTrue(LHS->getContext());
+  case 7: 
+    if (!isordered) return ConstantInt::getTrue(LHS->getContext());
+    Pred = FCmpInst::FCMP_ORD; break;
    }
    return Builder->CreateFCmp(Pred, LHS, RHS);
  }
@@ -272,10 +275,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
      ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
                                         AndRHS->getValue() & ShlMask);
  
-    if (CI->getValue() == ShlMask) { 
-    // Masking out bits that the shift already masks
+    if (CI->getValue() == ShlMask)
+      // Masking out bits that the shift already masks.
        return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
-    } else if (CI != AndRHS) {                  // Reducing bits set in and.
+    
+    if (CI != AndRHS) {                  // Reducing bits set in and.
        TheAnd.setOperand(1, CI);
        return &TheAnd;
      }
@@ -292,10 +296,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
      ConstantInt *CI = ConstantInt::get(Op->getContext(),
                                         AndRHS->getValue() & ShrMask);
  
-    if (CI->getValue() == ShrMask) {   
-    // Masking out bits that the shift already masks.
+    if (CI->getValue() == ShrMask)
+      // Masking out bits that the shift already masks.
        return ReplaceInstUsesWith(TheAnd, Op);
-    } else if (CI != AndRHS) {
+    
+    if (CI != AndRHS) {
        TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
        return &TheAnd;
      }
@@ -326,7 +331,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
  
  
  /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi).  In pratice, we emit the more efficient
+/// true, otherwise (V < Lo || V >= Hi).  In practice, we emit the more efficient
  /// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
  /// whether to treat the V, Lo and HI as signed or not. IB is the location to
  /// insert new instructions.
@@ -701,9 +706,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
      // whole construct
      if (!MCst->isZero())
        return 0;
-    Value* newOr1 = Builder->CreateOr(B, D);
-    Value* newOr2 = ConstantExpr::getOr(CCst, ECst);
-    Value* newAnd = Builder->CreateAnd(A, newOr1);
+    Value *newOr1 = Builder->CreateOr(B, D);
+    Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
+    Value *newAnd = Builder->CreateAnd(A, newOr1);
      return Builder->CreateICmp(NEWCC, newAnd, newOr2);
    }
    return 0;
@@ -727,12 +732,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
      }
    }
  
-  {
-    // handle (roughly):
-    // (icmp eq (A & B), C) & (icmp eq (A & D), E)
-    Value* fold = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder);
-    if (fold) return fold;
-  }
+  // handle (roughly):  (icmp eq (A & B), C) & (icmp eq (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+    return V;
    
    // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
    Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
@@ -754,6 +756,54 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
        Value *NewOr = Builder->CreateOr(Val, Val2);
        return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
      }
+
+    // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0)
+    if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+      Value *NewAnd = Builder->CreateAnd(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+    }
+
+    // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1)
+    if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+  }
+
+  // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
+  // where CMAX is the all ones value for the truncated type,
+  // iff the lower bits of C2 and CA are zero.
+  if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) &&
+      LHS->hasOneUse() && RHS->hasOneUse()) {
+    Value *V;
+    ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
+
+    // (trunc x) == C1 & (and x, CA) == C2
+    if (match(Val2, m_Trunc(m_Value(V))) &&
+        match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+      SmallCst = RHSCst;
+      BigCst = LHSCst;
+    }
+    // (and x, CA) == C2 & (trunc x) == C1
+    else if (match(Val, m_Trunc(m_Value(V))) &&
+             match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+      SmallCst = LHSCst;
+      BigCst = RHSCst;
+    }
+
+    if (SmallCst && BigCst) {
+      unsigned BigBitSize = BigCst->getType()->getBitWidth();
+      unsigned SmallBitSize = SmallCst->getType()->getBitWidth();
+
+      // Check that the low bits are zero.
+      APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
+      if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) {
+        Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue());
+        APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue();
+        Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N);
+        return Builder->CreateICmp(LHSCC, NewAnd, NewVal);
+      }
+    }
    }
    
    // From here on, we only handle:
@@ -766,7 +816,17 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
        LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
        RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
      return 0;
-  
+
+  // Make a constant range that's the intersection of the two icmp ranges.
+  // If the intersection is empty, we know that the result is false.
+  ConstantRange LHSRange = 
+    ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+  ConstantRange RHSRange = 
+    ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+
+  if (LHSRange.intersectWith(RHSRange).isEmptySet())
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+
    // We can't fold (ugt x, C) & (sgt x, C2).
    if (!PredicatesFoldable(LHSCC, RHSCC))
      return 0;
@@ -799,10 +859,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
    case ICmpInst::ICMP_EQ:
      switch (RHSCC) {
      default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
-    case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
-    case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
-      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
      case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
      case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
      case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
@@ -850,9 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
    case ICmpInst::ICMP_SLT:
      switch (RHSCC) {
      default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
-    case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
-      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
      case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
        break;
      case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
@@ -984,8 +1037,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
    if (Value *V = SimplifyAndInst(Op0, Op1, TD))
      return ReplaceInstUsesWith(I, V);
  
-  if (Instruction *NV = SimplifyDistributed(I)) // (A|B)&(A|C) -> A|(B&C)
-    return NV;
+  // (A|B)&(A|C) -> A|(B&C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
  
    // See if we can simplify any instructions used by the instruction whose sole 
    // purpose is to compute bits we don't care about.
@@ -994,7 +1048,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
  
    if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
      const APInt &AndRHSMask = AndRHS->getValue();
-    APInt NotAndRHS(~AndRHSMask);
  
      // Optimize a variety of ((val OP C1) & C2) combinations...
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
@@ -1003,10 +1056,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
        switch (Op0I->getOpcode()) {
        default: break;
        case Instruction::Xor:
-      case Instruction::Or:
+      case Instruction::Or: {
          // If the mask is only needed on one incoming arm, push it up.
          if (!Op0I->hasOneUse()) break;
            
+        APInt NotAndRHS(~AndRHSMask);
          if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
            // Not masking anything out for the LHS, move to RHS.
            Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
@@ -1022,6 +1076,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
          }
  
          break;
+      }
        case Instruction::Add:
          // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
          // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
@@ -1041,14 +1096,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
  
          // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
          // has 1's for all bits that the subtraction with A might affect.
-        if (Op0I->hasOneUse()) {
+        if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
            uint32_t BitWidth = AndRHSMask.getBitWidth();
            uint32_t Zeros = AndRHSMask.countLeadingZeros();
            APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
  
-          ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
-          if (!(A && A->isZero()) &&               // avoid infinite recursion.
-              MaskedValueIsZero(Op0LHS, Mask)) {
+          if (MaskedValueIsZero(Op0LHS, Mask)) {
              Value *NewNeg = Builder->CreateNeg(Op0RHS);
              return BinaryOperator::CreateAnd(NewNeg, AndRHS);
            }
@@ -1066,39 +1119,25 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
          }
          break;
        }
-
+          
        if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
          if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
            return Res;
-    } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
-      // If this is an integer truncation or change from signed-to-unsigned, and
-      // if the source is an and/or with immediate, transform it.  This
-      // frequently occurs for bitfield accesses.
-      if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
-        if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
-            CastOp->getNumOperands() == 2)
-          if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
-            if (CastOp->getOpcode() == Instruction::And) {
-              // Change: and (cast (and X, C1) to T), C2
-              // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
-              // This will fold the two constants together, which may allow 
-              // other simplifications.
-              Value *NewCast = Builder->CreateTruncOrBitCast(
-                CastOp->getOperand(0), I.getType(), 
-                CastOp->getName()+".shrunk");
-              // trunc_or_bitcast(C1)&C2
-              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
-              C3 = ConstantExpr::getAnd(C3, AndRHS);
-              return BinaryOperator::CreateAnd(NewCast, C3);
-            } else if (CastOp->getOpcode() == Instruction::Or) {
-              // Change: and (cast (or X, C1) to T), C2
-              // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
-              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
-              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
-                // trunc(C1)&C2
-                return ReplaceInstUsesWith(I, AndRHS);
-            }
-          }
+    }
+    
+    // If this is an integer truncation, and if the source is an 'and' with
+    // immediate, transform it.  This frequently occurs for bitfield accesses.
+    {
+      Value *X = 0; ConstantInt *YC = 0;
+      if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
+        // Change: and (trunc (and X, YC) to T), C2
+        // into  : and (trunc X to T), trunc(YC) & C2
+        // This will fold the two constants together, which may allow 
+        // other simplifications.
+        Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+        Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
+        C3 = ConstantExpr::getAnd(C3, AndRHS);
+        return BinaryOperator::CreateAnd(NewCast, C3);
        }
      }
  
@@ -1120,7 +1159,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
                                        I.getName()+".demorgan");
          return BinaryOperator::CreateNot(Or);
        }
-
+  
    {
      Value *A = 0, *B = 0, *C = 0, *D = 0;
      // (A|B) & ~(A&B) -> A^B
@@ -1153,7 +1192,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
          cast<BinaryOperator>(Op1)->swapOperands();
          std::swap(A, B);
        }
-      if (A == Op0)                                // A&(A^B) -> A & ~B
+      // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+      // A is originally -1 (or a vector of -1 and undefs), then we enter
+      // an endless loop. By checking that A is non-constant we ensure that
+      // we will never get to the loop.
+      if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
          return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
      }
  
@@ -1357,7 +1400,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
  /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
  /// If so, insert the new bswap intrinsic and return it.
  Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
-  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+  IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
    if (!ITy || ITy->getBitWidth() % 16 || 
        // ByteMask only allows up to 32-byte values.
        ITy->getBitWidth() > 32*8) 
@@ -1381,9 +1424,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
    for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
      if (ByteValues[i] != V)
        return 0;
-  const Type *Tys[] = { ITy };
    Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
    return CallInst::Create(F, V);
  }
  
@@ -1429,13 +1471,11 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
        return getICmpValue(isSigned, Code, Op0, Op1, Builder);
      }
    }
-  
-  {
-    // handle (roughly):
-    // (icmp ne (A & B), C) | (icmp ne (A & D), E)
-    Value* fold = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder);
-    if (fold) return fold;
-  }
+
+  // handle (roughly):
+  // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+    return V;
  
    // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
    Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
@@ -1449,8 +1489,29 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
        Value *NewOr = Builder->CreateOr(Val, Val2);
        return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
      }
+
+    // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+
+    // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1)
+    if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+      Value *NewAnd = Builder->CreateAnd(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+    }
    }
-  
+
+  // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
+  //   iff C2 + CA == C1.
+  if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
+    ConstantInt *AddCst;
+    if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
+      if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
+        return Builder->CreateICmpULE(Val, LHSCst);
+  }
+
    // From here on, we only handle:
    //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
    if (Val != Val2) return 0;
@@ -1695,8 +1756,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    if (Value *V = SimplifyOrInst(Op0, Op1, TD))
      return ReplaceInstUsesWith(I, V);
  
-  if (Instruction *NV = SimplifyDistributed(I)) // (A&B)|(A&C) -> A&(B|C)
-    return NV;
+  // (A&B)|(A&C) -> A&(B|C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
  
    // See if we can simplify any instructions used by the instruction whose sole 
    // purpose is to compute bits we don't care about.
@@ -1744,8 +1806,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
    if (match(Op0, m_Or(m_Value(), m_Value())) ||
        match(Op1, m_Or(m_Value(), m_Value())) ||
-      (match(Op0, m_Shift(m_Value(), m_Value())) &&
-       match(Op1, m_Shift(m_Value(), m_Value())))) {
+      (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+       match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
      if (Instruction *BSwap = MatchBSwap(I))
        return BSwap;
    }
@@ -1898,6 +1960,39 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
          return BinaryOperator::CreateNot(And);
        }
  
+  // Canonicalize xor to the RHS.
+  if (match(Op0, m_Xor(m_Value(), m_Value())))
+    std::swap(Op0, Op1);
+
+  // A | ( A ^ B) -> A |  B
+  // A | (~A ^ B) -> A | ~B
+  if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+    if (Op0 == A || Op0 == B)
+      return BinaryOperator::CreateOr(A, B);
+
+    if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
+      Value *Not = Builder->CreateNot(B, B->getName()+".not");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+    if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
+      Value *Not = Builder->CreateNot(A, A->getName()+".not");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+  }
+
+  // A | ~(A | B) -> A | ~B
+  // A | ~(A ^ B) -> A | ~B
+  if (match(Op1, m_Not(m_Value(A))))
+    if (BinaryOperator *B = dyn_cast<BinaryOperator>(A))
+      if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) &&
+          Op1->hasOneUse() && (B->getOpcode() == Instruction::Or ||
+                               B->getOpcode() == Instruction::Xor)) {
+        Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
+                                                 B->getOperand(0);
+        Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not");
+        return BinaryOperator::CreateOr(Not, Op0);
+      }
+
    if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
      if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
        if (Value *Res = FoldOrOfICmps(LHS, RHS))
@@ -1911,39 +2006,46 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    
    // fold (or (cast A), (cast B)) -> (cast (or A, B))
    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
-    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
-      if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
-        const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() &&
-            SrcTy->isIntOrIntVectorTy()) {
-          Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
-
-          if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
-              // Only do this if the casts both really cause code to be
-              // generated.
-              ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
-              ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
-            Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
-            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
-          }
-          
-          // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
-          // cast is otherwise not optimizable.  This happens for vector sexts.
-          if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
-            if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
-              if (Value *Res = FoldOrOfICmps(LHS, RHS))
-                return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-          
-          // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
-          // cast is otherwise not optimizable.  This happens for vector sexts.
-          if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
-            if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
-              if (Value *Res = FoldOrOfFCmps(LHS, RHS))
-                return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+    CastInst *Op1C = dyn_cast<CastInst>(Op1);
+    if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+      const Type *SrcTy = Op0C->getOperand(0)->getType();
+      if (SrcTy == Op1C->getOperand(0)->getType() &&
+          SrcTy->isIntOrIntVectorTy()) {
+        Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+        if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
+            // Only do this if the casts both really cause code to be
+            // generated.
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+          Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
          }
+        
+        // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+          if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+            if (Value *Res = FoldOrOfICmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+        
+        // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+          if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+            if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
        }
+    }
    }
-  
+
+  // or(sext(A), B) -> A ? -1 : B where A is an i1
+  // or(A, sext(B)) -> B ? -1 : A where B is an i1
+  if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+    return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
+  if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+    return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
+
    // Note: If we've gotten to the point of visiting the outer OR, then the
    // inner one couldn't be simplified.  If it was a constant, then it won't
    // be simplified by a later pass either, so we try swapping the inner/outer
@@ -1966,8 +2068,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
    if (Value *V = SimplifyXorInst(Op0, Op1, TD))
      return ReplaceInstUsesWith(I, V);
  
-  if (Instruction *NV = SimplifyDistributed(I)) // (A&B)^(A&C) -> A&(B^C)
-    return NV;
+  // (A&B)^(A&C) -> A&(B^C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
  
    // See if we can simplify any instructions used by the instruction whose sole 
    // purpose is to compute bits we don't care about.