(i32 sext_in_reg (i32 aext (i16 x)), i16) -> (i32 sext x). No known test case until...

[oota-llvm.git] / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 0447e3b828e05fc9e7bbea4d9da8c515bab07f66..9ba9bb5e38d0dac4cde7e98930b99de4872c96ad 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -129,6 +129,7 @@ namespace {
      bool CombineToPreIndexedLoadStore(SDNode *N);
      bool CombineToPostIndexedLoadStore(SDNode *N);
  
+    SDValue PromoteIntBinOp(SDValue Op);
  
      /// combine - call the node-specific routine that knows how to fold each
      /// particular type of node. If that doesn't do anything, try the
@@ -254,24 +255,28 @@ namespace {
      /// looking for a better chain (aliasing node.)
      SDValue FindBetterChain(SDNode *N, SDValue Chain);
  
-    /// getShiftAmountTy - Returns a type large enough to hold any valid
-    /// shift amount - before type legalization these can be huge.
-    EVT getShiftAmountTy() {
-      return LegalTypes ?  TLI.getShiftAmountTy() : TLI.getPointerTy();
-    }
-
-public:
+  public:
      DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
-      : DAG(D),
-        TLI(D.getTargetLoweringInfo()),
-        Level(Unrestricted),
-        OptLevel(OL),
-        LegalOperations(false),
-        LegalTypes(false),
-        AA(A) {}
+      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+        OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
  
      /// Run - runs the dag combiner on all nodes in the work list
      void Run(CombineLevel AtLevel);
+    
+    SelectionDAG &getDAG() const { return DAG; }
+    
+    /// getShiftAmountTy - Returns a type large enough to hold any valid
+    /// shift amount - before type legalization these can be huge.
+    EVT getShiftAmountTy() {
+      return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
+    }
+    
+    /// isTypeLegal - This method returns true if we are running before type
+    /// legalization or if the specified VT is legal.
+    bool isTypeLegal(const EVT &VT) {
+      if (!LegalTypes) return true;
+      return TLI.isTypeLegal(VT);
+    }
    };
  }
  
@@ -629,6 +634,46 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
    return true;
  }
  
+static SDValue PromoteOperand(SDValue Op, EVT PVT, SelectionDAG &DAG) {
+  unsigned Opc = ISD::ZERO_EXTEND;
+  if (Op.getOpcode() == ISD::Constant) {
+    // Zero extend things like i1, sign extend everything else.  It shouldn't
+    // matter in theory which one we pick, but this tends to give better code?
+    // See DAGTypeLegalizer::PromoteIntRes_Constant.
+    if (Op.getValueType().isByteSized())
+      Opc = ISD::SIGN_EXTEND;
+  }
+  return DAG.getNode(Opc, Op.getDebugLoc(), PVT, Op);
+}
+
+/// PromoteIntBinOp - Promote the specified integer binary operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  EVT PVT = VT;
+  if (TLI.PerformDAGCombinePromotion(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    SDValue N0 = PromoteOperand(Op.getOperand(0), PVT, DAG);
+    AddToWorkList(N0.getNode());
+
+    SDValue N1 = PromoteOperand(Op.getOperand(1), PVT, DAG);
+    AddToWorkList(N1.getNode());
+
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Op.getOpcode(), dl, PVT, N0, N1));
+  }
+  return SDValue();
+}
+
  //===----------------------------------------------------------------------===//
  //  Main DAG Combiner implementation
  //===----------------------------------------------------------------------===//
@@ -1064,7 +1109,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    if (VT.isInteger() && !VT.isVector()) {
      APInt LHSZero, LHSOne;
      APInt RHSZero, RHSOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
      DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
  
      if (LHSZero.getBoolValue()) {
@@ -1108,7 +1153,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                                         N0.getOperand(0).getOperand(1),
                                         N0.getOperand(1)));
  
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
  }
  
  SDValue DAGCombiner::visitADDC(SDNode *N) {
@@ -1136,7 +1181,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
    // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
    APInt LHSZero, LHSOne;
    APInt RHSZero, RHSOne;
-  APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+  APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
  
    if (LHSZero.getBoolValue()) {
@@ -1246,7 +1291,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
                                   VT);
      }
  
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
  }
  
  SDValue DAGCombiner::visitMUL(SDNode *N) {
@@ -1339,7 +1384,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
    if (RMUL.getNode() != 0)
      return RMUL;
  
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
  }
  
  SDValue DAGCombiner::visitSDIV(SDNode *N) {
@@ -1758,7 +1803,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    EVT VT = N1.getValueType();
-  unsigned BitWidth = VT.getSizeInBits();
+  unsigned BitWidth = VT.getScalarType().getSizeInBits();
  
    // fold vector ops
    if (VT.isVector()) {
@@ -1786,7 +1831,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
    if (RAND.getNode() != 0)
      return RAND;
-  // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+  // fold (and (or x, C), D) -> D if (C & D) == D
    if (N1C && N0.getOpcode() == ISD::OR)
      if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
        if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
@@ -1872,9 +1917,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
      EVT MemVT = LN0->getMemoryVT();
      // If we zero all the possible extended bits, then we can turn this into
      // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
      if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - MemVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
          ((!LegalOperations && !LN0->isVolatile()) ||
           TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
@@ -1895,9 +1940,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
      EVT MemVT = LN0->getMemoryVT();
      // If we zero all the possible extended bits, then we can turn this into
      // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
      if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - MemVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
          ((!LegalOperations && !LN0->isVolatile()) ||
           TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
@@ -1983,7 +2028,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
      }
    }
  
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
  }
  
  SDValue DAGCombiner::visitOR(SDNode *N) {
@@ -2025,13 +2070,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    if (ROR.getNode() != 0)
      return ROR;
    // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+  // iff (c1 & c2) == 0.
    if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
               isa<ConstantSDNode>(N0.getOperand(1))) {
      ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
-    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
-                       DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
-                                   N0.getOperand(0), N1),
-                       DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+    if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                                     N0.getOperand(0), N1),
+                         DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
    }
    // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
    if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
@@ -2107,7 +2154,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
      return SDValue(Rot, 0);
  
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
  }
  
  /// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
@@ -2416,7 +2463,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
        SimplifyDemandedBits(SDValue(N, 0)))
      return SDValue(N, 0);
  
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
  }
  
  /// visitShiftByConstant - Handle transforms common to the three shifts, when
@@ -2729,6 +2776,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
                         DAG.getConstant(c1 + c2, N1.getValueType()));
    }
+  
+  // fold (srl (shl x, c), c) -> (and x, cst2)
+  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+      N0.getValueSizeInBits() <= 64) {
+    uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(~0ULL >> ShAmt, VT));
+  }
+  
  
    // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
    if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -2754,7 +2810,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    if (N1C && N0.getOpcode() == ISD::CTLZ &&
        N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
      APInt KnownZero, KnownOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
      DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
  
      // If any of the input bits are KnownOne, then the input couldn't be all
@@ -3622,7 +3678,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
    // Do not generate loads of non-round integer types since these can
    // be expensive (and would be wrong if the type is not byte sized).
    if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
-      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
+      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
        // Do not change the width of a volatile load.
        !cast<LoadSDNode>(N0)->isVolatile()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -3692,7 +3748,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
    // if x is small enough.
    if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
      SDValue N00 = N0.getOperand(0);
-    if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
+    if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
        return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
    }
  
@@ -3777,7 +3834,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    if (N0.getOpcode() == ISD::TRUNCATE)
      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
    // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
-  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND ||
        N0.getOpcode() == ISD::ANY_EXTEND) {
      if (N0.getOperand(0).getValueType().bitsLT(VT))
        // if the source is smaller than the dest, we still need an extend
@@ -3947,7 +4005,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
        VT.isInteger() && !VT.isVector()) {
      unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
      EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
-    if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
+    if (isTypeLegal(IntXVT)) {
        SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
                                IntXVT, N0.getOperand(1));
        AddToWorkList(X.getNode());
@@ -4073,8 +4131,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
          if (Op.getOpcode() == ISD::UNDEF) continue;
          EltIsUndef = false;
  
-        NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
-                    zextOrTrunc(SrcBitSize).zext(DstBitSize));
+        NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+                   zextOrTrunc(SrcBitSize).zext(DstBitSize);
        }
  
        if (EltIsUndef)
@@ -4462,7 +4520,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  
    // fold (fp_round_inreg c1fp) -> c1fp
-  if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
+  if (N0CFP && isTypeLegal(EVT)) {
      SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
      return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
    }
@@ -4603,7 +4661,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
  
    SDNode *Trunc = 0;
    if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
-    // Look pass truncate.
+    // Look past truncate.
      Trunc = N1.getNode();
      N1 = N1.getOperand(0);
    }
@@ -4698,7 +4756,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
            Equal = true;
          }
  
-      EVT SetCCVT = N1.getValueType();
+      SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
+      
+      EVT SetCCVT = NodeToReplace.getValueType();
        if (LegalTypes)
          SetCCVT = TLI.getSetCCResultType(SetCCVT);
        SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
@@ -4707,9 +4767,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                                     Equal ? ISD::SETEQ : ISD::SETNE);
        // Replace the uses of XOR with SETCC
        WorkListRemover DeadNodes(*this);
-      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
-      removeFromWorkList(N1.getNode());
-      DAG.DeleteNode(N1.getNode());
+      DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
+      removeFromWorkList(NodeToReplace.getNode());
+      DAG.DeleteNode(NodeToReplace.getNode());
        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
                           MVT::Other, Chain, SetCC, N2);
      }
@@ -5018,18 +5078,6 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
    SDValue Chain = LD->getChain();
    SDValue Ptr   = LD->getBasePtr();
  
-  // Try to infer better alignment information than the load already has.
-  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
-    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
-      if (Align > LD->getAlignment())
-        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
-                              LD->getValueType(0),
-                              Chain, Ptr, LD->getSrcValue(),
-                              LD->getSrcValueOffset(), LD->getMemoryVT(),
-                              LD->isVolatile(), LD->isNonTemporal(), Align);
-    }
-  }
-
    // If load is not volatile and there are no uses of the loaded value (and
    // the updated indexed value in case of indexed loads), change uses of the
    // chain value into uses of the chain input (i.e. delete the dead load).
@@ -5095,6 +5143,18 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
      }
    }
  
+  // Try to infer better alignment information than the load already has.
+  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > LD->getAlignment())
+        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+                              LD->getValueType(0),
+                              Chain, Ptr, LD->getSrcValue(),
+                              LD->getSrcValueOffset(), LD->getMemoryVT(),
+                              LD->isVolatile(), LD->isNonTemporal(), Align);
+    }
+  }
+
    if (CombinerAA) {
      // Walk up chain skipping non-aliasing memory nodes.
      SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5141,6 +5201,136 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
    return SDValue();
  }
  
+/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
+/// load is having specific bytes cleared out.  If so, return the byte size
+/// being masked out and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+  std::pair<unsigned, unsigned> Result(0, 0);
+  
+  // Check for the structure we're looking for.
+  if (V->getOpcode() != ISD::AND ||
+      !isa<ConstantSDNode>(V->getOperand(1)) ||
+      !ISD::isNormalLoad(V->getOperand(0).getNode()))
+    return Result;
+  
+  // Check the chain and pointer.
+  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+  if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
+  
+  // The store should be chained directly to the load or be an operand of a
+  // tokenfactor.
+  if (LD == Chain.getNode())
+    ; // ok.
+  else if (Chain->getOpcode() != ISD::TokenFactor)
+    return Result; // Fail.
+  else {
+    bool isOk = false;
+    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
+      if (Chain->getOperand(i).getNode() == LD) {
+        isOk = true;
+        break;
+      }
+    if (!isOk) return Result;
+  }
+  
+  // This only handles simple types.
+  if (V.getValueType() != MVT::i16 &&
+      V.getValueType() != MVT::i32 &&
+      V.getValueType() != MVT::i64)
+    return Result;
+
+  // Check the constant mask.  Invert it so that the bits being masked out are
+  // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
+  // follow the sign bit for uniformity.
+  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+  unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
+  if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
+  unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
+  if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
+  if (NotMaskLZ == 64) return Result;  // All zero mask.
+  
+  // See if we have a continuous run of bits.  If so, we have 0*1+0*
+  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+    return Result;
+
+  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+  if (V.getValueType() != MVT::i64 && NotMaskLZ)
+    NotMaskLZ -= 64-V.getValueSizeInBits();
+  
+  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+  switch (MaskedBytes) {
+  case 1: 
+  case 2: 
+  case 4: break;
+  default: return Result; // All one mask, or 5-byte mask.
+  }
+  
+  // Verify that the first bit starts at a multiple of mask so that the access
+  // is aligned the same as the access width.
+  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+  
+  Result.first = MaskedBytes;
+  Result.second = NotMaskTZ/8;
+  return Result;
+}
+
+
+/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
+/// provides a value as specified by MaskInfo.  If so, replace the specified
+/// store with a narrower store of truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+                                SDValue IVal, StoreSDNode *St,
+                                DAGCombiner *DC) {
+  unsigned NumBytes = MaskInfo.first;
+  unsigned ByteShift = MaskInfo.second;
+  SelectionDAG &DAG = DC->getDAG();
+  
+  // Check to see if IVal is all zeros in the part being masked in by the 'or'
+  // that uses this.  If not, this is not a replacement.
+  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+                                  ByteShift*8, (ByteShift+NumBytes)*8);
+  if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+  
+  // Check that it is legal on the target to do this.  It is legal if the new
+  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+  // legalization.
+  MVT VT = MVT::getIntegerVT(NumBytes*8);
+  if (!DC->isTypeLegal(VT))
+    return 0;
+  
+  // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
+  // shifted by ByteShift and truncated down to NumBytes.
+  if (ByteShift)
+    IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
+                       DAG.getConstant(ByteShift*8, DC->getShiftAmountTy()));
+
+  // Figure out the offset for the store and the alignment of the access.
+  unsigned StOffset;
+  unsigned NewAlign = St->getAlignment();
+
+  if (DAG.getTargetLoweringInfo().isLittleEndian())
+    StOffset = ByteShift;
+  else
+    StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+  
+  SDValue Ptr = St->getBasePtr();
+  if (StOffset) {
+    Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
+                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+    NewAlign = MinAlign(NewAlign, StOffset);
+  }
+  
+  // Truncate down to the new size.
+  IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
+  
+  ++OpsNarrowed;
+  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 
+                      St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+                      false, false, NewAlign).getNode();
+}
+
  
  /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
  /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
@@ -5160,6 +5350,28 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
      return SDValue();
  
    unsigned Opc = Value.getOpcode();
+  
+  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+  // is a byte mask indicating a consecutive number of bytes, check to see if
+  // Y is known to provide just those bytes.  If so, we try to replace the
+  // load + replace + store sequence with a single (narrower) store, which makes
+  // the load dead.
+  if (Opc == ISD::OR) {
+    std::pair<unsigned, unsigned> MaskedLoad;
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(1), ST,this))
+        return SDValue(NewST, 0);
+                                           
+    // Or is commutative, so try swapping X and Y.
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(0), ST,this))
+        return SDValue(NewST, 0);
+  }
+  
    if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
        Value.getOperand(1).getOpcode() != ISD::Constant)
      return SDValue();
@@ -5207,8 +5419,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
          PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
  
        unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
-      if (NewAlign <
-          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
+      const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+      if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
          return SDValue();
  
        SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -5246,17 +5458,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
    SDValue Value = ST->getValue();
    SDValue Ptr   = ST->getBasePtr();
  
-  // Try to infer better alignment information than the store already has.
-  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
-    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
-      if (Align > ST->getAlignment())
-        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
-                                 Ptr, ST->getSrcValue(),
-                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
-                                 ST->isVolatile(), ST->isNonTemporal(), Align);
-    }
-  }
-
    // If this is a store of a bit convert, store the input value if the
    // resultant store does not need a higher alignment than the original.
    if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
@@ -5289,8 +5490,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
        case MVT::ppcf128:
          break;
        case MVT::f32:
-        if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
-             !ST->isVolatile()) ||
+        if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
              TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
            Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
                                bitcastToAPInt().getZExtValue(), MVT::i32);
@@ -5301,7 +5501,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
          }
          break;
        case MVT::f64:
-        if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
+        if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
               !ST->isVolatile()) ||
              TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
            Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@@ -5347,6 +5547,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
      }
    }
  
+  // Try to infer better alignment information than the store already has.
+  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > ST->getAlignment())
+        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+                                 Ptr, ST->getSrcValue(),
+                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
+                                 ST->isVolatile(), ST->isNonTemporal(), Align);
+    }
+  }
+
    if (CombinerAA) {
      // Walk up chain skipping non-aliasing memory nodes.
      SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5407,7 +5618,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
      if (SimplifyDemandedBits(Value,
                               APInt::getLowBitsSet(
                                 Value.getValueType().getScalarType().getSizeInBits(),
-                               ST->getMemoryVT().getSizeInBits())))
+                               ST->getMemoryVT().getScalarType().getSizeInBits())))
        return SDValue(N, 0);
    }
  
@@ -5547,7 +5758,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
          InVec = InVec.getOperand(0);
        if (ISD::isNormalLoad(InVec.getNode())) {
          LN0 = cast<LoadSDNode>(InVec);
-        Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;
+        Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
        }
      }
  
@@ -5655,7 +5866,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      }
  
      // Add count and size info.
-    if (!TLI.isTypeLegal(VT) && LegalTypes)
+    if (!isTypeLegal(VT))
        return SDValue();
  
      // Return the new VECTOR_SHUFFLE node.
@@ -6283,7 +6494,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
  /// FindBaseOffset - Return true if base is a frame index, which is known not
  // to alias with anything but itself.  Provides base object and offset as results.
  static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
-                           GlobalValue *&GV, void *&CV) {
+                           const GlobalValue *&GV, void *&CV) {
    // Assume it is a primitive operation.
    Base = Ptr; Offset = 0; GV = 0; CV = 0;
  
@@ -6331,7 +6542,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
    // Gather base node and offset information.
    SDValue Base1, Base2;
    int64_t Offset1, Offset2;
-  GlobalValue *GV1, *GV2;
+  const GlobalValue *GV1, *GV2;
    void *CV1, *CV2;
    bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
    bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);