Refactor a bunch of includes so that TargetMachine.h doesn't have to include

[oota-llvm.git] / lib / CodeGen / SelectionDAG / TargetLowering.cpp
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index a48e907490998cbe6136999f19c982527c597dfd..50d2dfb16e4ddbc55580d9df78ba839e43a1d6fe 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12,8 +12,10 @@
  //===----------------------------------------------------------------------===//
  
  #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
  #include "llvm/Target/TargetMachine.h"
  #include "llvm/Target/MRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/Support/MathExtras.h"
@@ -21,15 +23,17 @@ using namespace llvm;
  
  TargetLowering::TargetLowering(TargetMachine &tm)
    : TM(tm), TD(TM.getTargetData()) {
-  assert(ISD::BUILTIN_OP_END <= 128 &&
+  assert(ISD::BUILTIN_OP_END <= 156 &&
           "Fixed size array in TargetLowering is not large enough!");
    // All operations default to being supported.
    memset(OpActions, 0, sizeof(OpActions));
  
-  IsLittleEndian = TD.isLittleEndian();
-  ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD.getIntPtrType());
+  IsLittleEndian = TD->isLittleEndian();
+  ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType());
    ShiftAmtHandling = Undefined;
    memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, 
+         sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0]));
    maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
    allowUnalignedMemoryAccesses = false;
    UseUnderscoreSetJmpLongJmp = false;
@@ -122,6 +126,14 @@ void TargetLowering::computeRegisterProperties() {
    // Set MVT::Vector to always be Expanded
    SetValueTypeAction(MVT::Vector, Expand, *this, TransformToType, 
                       ValueTypeActions);
+  
+  // Loop over all of the legal vector value types, specifying an identity type
+  // transformation.
+  for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+       i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    if (isTypeLegal((MVT::ValueType)i))
+      TransformToType[i] = (MVT::ValueType)i;
+  }
  
    assert(isTypeLegal(MVT::f64) && "Target does not support FP?");
    TransformToType[MVT::f64] = MVT::f64;
@@ -131,6 +143,50 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
    return NULL;
  }
  
+/// getPackedTypeBreakdown - Packed types are broken down into some number of
+/// legal scalar types.  For example, <8 x float> maps to 2 MVT::v2f32 values
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+///
+/// This method returns the number and type of the resultant breakdown.
+///
+unsigned TargetLowering::getPackedTypeBreakdown(const PackedType *PTy, 
+                                                MVT::ValueType &PTyElementVT,
+                                      MVT::ValueType &PTyLegalElementVT) const {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = PTy->getNumElements();
+  MVT::ValueType EltTy = getValueType(PTy->getElementType());
+  
+  unsigned NumVectorRegs = 1;
+  
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !isTypeLegal(getVectorType(EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+  
+  MVT::ValueType VT;
+  if (NumElts == 1) {
+    VT = EltTy;
+  } else {
+    VT = getVectorType(EltTy, NumElts); 
+  }
+  PTyElementVT = VT;
+
+  MVT::ValueType DestVT = getTypeToTransformTo(VT);
+  PTyLegalElementVT = DestVT;
+  if (DestVT < VT) {
+    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(MVT::getSizeInBits(VT)/MVT::getSizeInBits(DestVT));
+  } else {
+    // Otherwise, promotion or legal types use the same number of registers as
+    // the vector decimated to the appropriate level.
+    return NumVectorRegs;
+  }
+  
+  return DestVT;
+}
+
  //===----------------------------------------------------------------------===//
  //  Optimization Methods
  //===----------------------------------------------------------------------===//
@@ -141,7 +197,7 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
  /// constant and return true.
  bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op, 
                                                              uint64_t Demanded) {
-  // FIXME: ISD::SELECT
+  // FIXME: ISD::SELECT, ISD::SELECT_CC
    switch(Op.getOpcode()) {
    default: break;
    case ISD::AND:
@@ -199,15 +255,29 @@ bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask,
      // We know all of the bits for a constant!
      KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask;
      KnownZero = ~KnownOne & DemandedMask;
-    return false;
+    return false;   // Don't fall through, will infinitely loop.
    case ISD::AND:
-    // If either the LHS or the RHS are Zero, the result is zero.
+    // If the RHS is a constant, check to see if the LHS would be zero without
+    // using the bits from the RHS.  Below, we use knowledge about the RHS to
+    // simplify the LHS, here we're using information from the LHS to simplify
+    // the RHS.
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      uint64_t LHSZero, LHSOne;
+      ComputeMaskedBits(Op.getOperand(0), DemandedMask,
+                        LHSZero, LHSOne, Depth+1);
+      // If the LHS already has zeros where RHSC does, this and is dead.
+      if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask))
+        return TLO.CombineTo(Op, Op.getOperand(0));
+      // If any of the set bits in the RHS are known zero on the LHS, shrink
+      // the constant.
+      if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask))
+        return true;
+    }
+    
      if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero,
                               KnownOne, TLO, Depth+1))
        return true;
      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    // If something is known zero on the RHS, the bits aren't demanded on the
-    // LHS.
      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero,
                               KnownZero2, KnownOne2, TLO, Depth+1))
        return true;
@@ -225,7 +295,7 @@ bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask,
      // If the RHS is a constant, see if we can simplify it.
      if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2))
        return true;
-        
+      
      // Output known-1 bits are only known if set in both the LHS & RHS.
      KnownOne &= KnownOne2;
      // Output known-0 are known to be clear if zero in either the LHS | RHS.
@@ -334,6 +404,24 @@ bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask,
      if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
        return true;
      
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SELECT_CC:
+    if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+      return true;
+      
      // Only known if known in both the LHS and RHS.
      KnownOne &= KnownOne2;
      KnownZero &= KnownZero2;
@@ -380,8 +468,14 @@ bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask,
        HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
        uint64_t TypeMask = MVT::getIntVTBitMask(VT);
        
-      if (SimplifyDemandedBits(Op.getOperand(0),
-                               (DemandedMask << ShAmt) & TypeMask,
+      uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask;
+
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      if (HighBits & DemandedMask)
+        InDemandedMask |= MVT::getIntVTSignBit(VT);
+      
+      if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
                                 KnownZero, KnownOne, TLO, Depth+1))
          return true;
        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
@@ -408,19 +502,20 @@ bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask,
      MVT::ValueType  VT = Op.getValueType();
      MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  
-    // Sign or Zero extension.  Compute the bits in the result that are not
+    // Sign extension.  Compute the demanded bits in the result that are not 
      // present in the input.
-    uint64_t NotIn = ~MVT::getIntVTBitMask(EVT);
-    uint64_t NewBits = MVT::getIntVTBitMask(VT) & NotIn;
+    uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask;
      
-    // Sign extension.
+    // If none of the extended bits are demanded, eliminate the sextinreg.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+
      uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
      int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT);
      
-    // If any of the sign extended bits are demanded, we know that the sign
+    // Since the sign extended bits are demanded, we know that the sign
      // bit is demanded.
-    if (NewBits & DemandedMask)
-      InputDemandedBits |= InSignBit;
+    InputDemandedBits |= InSignBit;
  
      if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
                               KnownZero, KnownOne, TLO, Depth+1))
@@ -430,37 +525,20 @@ bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask,
      // If the sign bit of the input is known set or clear, then we know the
      // top bits of the result.
      
-    // If the input sign bit is known zero, or if the NewBits are not demanded
-    // convert this into a zero extension.
-    if ((KnownZero & InSignBit) || (NewBits & ~DemandedMask) == NewBits) {
-      return TLO.CombineTo(Op, Op.getOperand(0));
-    } else if (KnownOne & InSignBit) {    // Input sign bit known set
+    // If the input sign bit is known zero, convert this into a zero extension.
+    if (KnownZero & InSignBit)
+      return TLO.CombineTo(Op, 
+                           TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT));
+    
+    if (KnownOne & InSignBit) {    // Input sign bit known set
        KnownOne |= NewBits;
        KnownZero &= ~NewBits;
-    } else {                              // Input sign bit unknown
+    } else {                       // Input sign bit unknown
        KnownZero &= ~NewBits;
        KnownOne &= ~NewBits;
      }
      break;
    }
-  case ISD::ADD:
-    if (ConstantSDNode *AA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero, 
-                               KnownOne, TLO, Depth+1))
-        return true;
-      // Compute the KnownOne/KnownZero masks for the constant, so we can set
-      // KnownZero appropriately if we're adding a constant that has all low
-      // bits cleared.
-      ComputeMaskedBits(Op.getOperand(1), 
-                        MVT::getIntVTBitMask(Op.getValueType()), 
-                        KnownZero2, KnownOne2, Depth+1);
-      
-      uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), 
-                                       CountTrailingZeros_64(~KnownZero2));
-      KnownZero = (1ULL << KnownZeroOut) - 1;
-      KnownOne = 0;
-    }
-    break;
    case ISD::CTTZ:
    case ISD::CTLZ:
    case ISD::CTPOP: {
@@ -470,7 +548,139 @@ bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask,
      KnownOne  = 0;
      break;
    }
+  case ISD::ZEXTLOAD: {
+    MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(3))->getVT();
+    KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask;
+    break;
+  }
+  case ISD::ZERO_EXTEND: {
+    uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    
+    // If none of the top bits are demanded, convert this into an any_extend.
+    uint64_t NewBits = (~InMask) & DemandedMask;
+    if (NewBits == 0)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, 
+                                               Op.getValueType(), 
+                                               Op.getOperand(0)));
+    
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero |= NewBits;
+    break;
+  }
+  case ISD::SIGN_EXTEND: {
+    MVT::ValueType InVT = Op.getOperand(0).getValueType();
+    uint64_t InMask    = MVT::getIntVTBitMask(InVT);
+    uint64_t InSignBit = MVT::getIntVTSignBit(InVT);
+    uint64_t NewBits   = (~InMask) & DemandedMask;
+    
+    // If none of the top bits are demanded, convert this into an any_extend.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(),
+                                           Op.getOperand(0)));
+    
+    // Since some of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    uint64_t InDemandedBits = DemandedMask & InMask;
+    InDemandedBits |= InSignBit;
+    
+    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    
+    // If the sign bit is known zero, convert this to a zero extend.
+    if (KnownZero & InSignBit)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, 
+                                               Op.getValueType(), 
+                                               Op.getOperand(0)));
+    
+    // If the sign bit is known one, the top bits match.
+    if (KnownOne & InSignBit) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::ANY_EXTEND: {
+    uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    break;
+  }
+  case ISD::TRUNCATE: {
+    // Simplify the input, using demanded bit information, and compute the known
+    // zero/one bits live out.
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    
+    // If the input is only used by this truncate, see if we can shrink it based
+    // on the known demanded bits.
+    if (Op.getOperand(0).Val->hasOneUse()) {
+      SDOperand In = Op.getOperand(0);
+      switch (In.getOpcode()) {
+      default: break;
+      case ISD::SRL:
+        // Shrink SRL by a constant if none of the high bits shifted in are
+        // demanded.
+        if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
+          uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType());
+          HighBits &= ~MVT::getIntVTBitMask(Op.getValueType());
+          HighBits >>= ShAmt->getValue();
+          
+          if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) &&
+              (DemandedMask & HighBits) == 0) {
+            // None of the shifted in bits are needed.  Add a truncate of the
+            // shift input, then shift it.
+            SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, 
+                                                 Op.getValueType(), 
+                                                 In.getOperand(0));
+            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(),
+                                                   NewTrunc, In.getOperand(1)));
+          }
+        }
+        break;
+      }
+    }
+    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
+    KnownZero &= OutMask;
+    KnownOne &= OutMask;
+    break;
+  }
+  case ISD::AssertZext: {
+    MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    uint64_t InMask = MVT::getIntVTBitMask(VT);
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero |= ~InMask & DemandedMask;
+    break;
    }
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
+    // Just use ComputeMaskedBits to compute output bits.
+    ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+    break;
+  }
+  
+  // If we know the value of all of the demanded bits, return this as a
+  // constant.
+  if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+    return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+  
    return false;
  }
  
@@ -577,7 +787,7 @@ void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask,
        KnownOne  <<= SA->getValue();
        KnownZero |= (1ULL << SA->getValue())-1;  // low bits known zero.
      }
-    break;
+    return;
    case ISD::SRL:
      // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
      if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -585,12 +795,12 @@ void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask,
        HighBits <<= MVT::getSizeInBits(Op.getValueType())-SA->getValue();
        Mask <<= SA->getValue();
        ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
-      assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
        KnownZero >>= SA->getValue();
        KnownOne  >>= SA->getValue();
        KnownZero |= HighBits;  // high bits known zero.
      }
-    break;
+    return;
    case ISD::SRA:
      if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
        uint64_t HighBits = (1ULL << SA->getValue())-1;
@@ -611,7 +821,41 @@ void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask,
          KnownOne |= HighBits;
        }
      }
-    break;
+    return;
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType  VT = Op.getValueType();
+    MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    
+    // Sign extension.  Compute the demanded bits in the result that are not 
+    // present in the input.
+    uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask;
+
+    uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
+    int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT);
+    
+    // If the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if (NewBits)
+      InputDemandedBits |= InSignBit;
+    
+    ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+                      KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero & InSignBit) {          // Input sign bit known clear
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne & InSignBit) {    // Input sign bit known set
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                              // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne  &= ~NewBits;
+    }
+    return;
+  }
    case ISD::CTTZ:
    case ISD::CTLZ:
    case ISD::CTPOP: {
@@ -622,28 +866,67 @@ void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask,
      return;
    }
    case ISD::ZEXTLOAD: {
-    unsigned SrcBits = 
-      MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(3))->getVT());
-    KnownZero |= ~((1ULL << SrcBits)-1);
+    MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(3))->getVT();
+    KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask;
      return;
    }
    case ISD::ZERO_EXTEND: {
-    unsigned SrcBits = 
-      MVT::getSizeInBits(Op.getOperand(0).getValueType());
-    KnownZero |= ~((1ULL << SrcBits)-1);
+    uint64_t InMask  = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    uint64_t NewBits = (~InMask) & Mask;
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 
+                      KnownOne, Depth+1);
+    KnownZero |= NewBits & Mask;
+    KnownOne  &= ~NewBits;
+    return;
+  }
+  case ISD::SIGN_EXTEND: {
+    MVT::ValueType InVT = Op.getOperand(0).getValueType();
+    unsigned InBits    = MVT::getSizeInBits(InVT);
+    uint64_t InMask    = MVT::getIntVTBitMask(InVT);
+    uint64_t InSignBit = 1ULL << (InBits-1);
+    uint64_t NewBits   = (~InMask) & Mask;
+    uint64_t InDemandedBits = Mask & InMask;
+
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if (NewBits & Mask)
+      InDemandedBits |= InSignBit;
+    
+    ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+                      KnownOne, Depth+1);
+    // If the sign bit is known zero or one, the  top bits match.
+    if (KnownZero & InSignBit) {
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne & InSignBit) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
      return;
    }
    case ISD::ANY_EXTEND: {
-    unsigned SrcBits = 
-      MVT::getSizeInBits(Op.getOperand(0).getValueType());
-    KnownZero &= ((1ULL << SrcBits)-1);
-    KnownOne  &= ((1ULL << SrcBits)-1);
+    MVT::ValueType VT = Op.getOperand(0).getValueType();
+    ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT),
+                      KnownZero, KnownOne, Depth+1);
      return;
    }
+  case ISD::TRUNCATE: {
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
+    KnownZero &= OutMask;
+    KnownOne &= OutMask;
+    break;
+  }
    case ISD::AssertZext: {
-    unsigned SrcBits = 
-      MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
-    KnownZero |= ~((1ULL << SrcBits)-1);
+    MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    uint64_t InMask = MVT::getIntVTBitMask(VT);
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 
+                      KnownOne, Depth+1);
+    KnownZero |= (~InMask) & Mask;
      return;
    }
    case ISD::ADD: {
@@ -654,7 +937,8 @@ void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask,
      assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
      
      // Output known-0 bits are known if clear or set in both the low clear bits
-    // common to both LHS & RHS;
+    // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
+    // low 3 bits clear.
      uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), 
                                       CountTrailingZeros_64(~KnownZero2));
      
@@ -662,16 +946,41 @@ void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask,
      KnownOne = 0;
      return;
    }
-  case ISD::SUB:
+  case ISD::SUB: {
+    ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+    if (!CLHS) return;
+
      // We know that the top bits of C-X are clear if X contains less bits
      // than C (i.e. no wrap-around can happen).  For example, 20-X is
      // positive if we can prove that X is >= 0 and < 16.
-    break;
+    MVT::ValueType VT = CLHS->getValueType(0);
+    if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) {  // sign bit clear
+      unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1);
+      uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit
+      MaskV = ~MaskV & MVT::getIntVTBitMask(VT);
+      ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1);
+
+      // If all of the MaskV bits are known to be zero, then we know the output
+      // top bits are zero, because we now know that the output is from [0-C].
+      if ((KnownZero & MaskV) == MaskV) {
+        unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue());
+        KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask;  // Top bits known zero.
+        KnownOne = 0;   // No one bits known.
+      } else {
+        KnownOne = KnownOne = 0;  // Otherwise, nothing known.
+      }
+    }
+    return;
+  }
    default:
      // Allow the target to implement this method for its nodes.
-    if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
        computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne);
-    break;
+    }
+    return;
    }
  }
  
@@ -683,13 +992,245 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
                                                      uint64_t &KnownZero, 
                                                      uint64_t &KnownOne,
                                                      unsigned Depth) const {
-  assert(Op.getOpcode() >= ISD::BUILTIN_OP_END &&
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
           "Should use MaskedValueIsZero if you don't know whether Op"
           " is a target node!");
    KnownZero = 0;
    KnownOne = 0;
  }
  
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits.  We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information.  For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned TargetLowering::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{
+  MVT::ValueType VT = Op.getValueType();
+  assert(MVT::isInteger(VT) && "Invalid VT!");
+  unsigned VTBits = MVT::getSizeInBits(VT);
+  unsigned Tmp, Tmp2;
+  
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::AssertSext:
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    return VTBits-Tmp+1;
+  case ISD::AssertZext:
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    return VTBits-Tmp;
+
+  case ISD::SEXTLOAD:    // '17' bits known
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(3))->getVT());
+    return VTBits-Tmp+1;
+  case ISD::ZEXTLOAD:    // '16' bits known
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(3))->getVT());
+    return VTBits-Tmp;
+    
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(Op)->getValue();
+    // If negative, invert the bits, then look at it.
+    if (Val & MVT::getIntVTSignBit(VT))
+      Val = ~Val;
+    
+    // Shift the bits so they are the leading bits in the int64_t.
+    Val <<= 64-VTBits;
+    
+    // Return # leading zeros.  We use 'min' here in case Val was zero before
+    // shifting.  We don't want to return '64' as for an i32 "0".
+    return std::min(VTBits, CountLeadingZeros_64(Val));
+  }
+    
+  case ISD::SIGN_EXTEND:
+    Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType());
+    return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+    
+  case ISD::SIGN_EXTEND_INREG:
+    // Max of the input and what this extends.
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    Tmp = VTBits-Tmp+1;
+    
+    Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    return std::max(Tmp, Tmp2);
+
+  case ISD::SRA:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    // SRA X, C   -> adds C sign bits.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      Tmp += C->getValue();
+      if (Tmp > VTBits) Tmp = VTBits;
+    }
+    return Tmp;
+  case ISD::SHL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (C->getValue() >= VTBits ||      // Bad shift.
+          C->getValue() >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - C->getValue();
+    }
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    return std::min(Tmp, Tmp2);
+
+  case ISD::SELECT:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    return std::min(Tmp, Tmp2);
+    
+  case ISD::SETCC:
+    // If setcc returns 0/-1, all bits are sign bits.
+    if (getSetCCResultContents() == ZeroOrNegativeOneSetCCResult)
+      return VTBits;
+    break;
+  case ISD::ROTL:
+  case ISD::ROTR:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned RotAmt = C->getValue() & (VTBits-1);
+      
+      // Handle rotate right by N like a rotate left by 32-N.
+      if (Op.getOpcode() == ISD::ROTR)
+        RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+      // If we aren't rotating out all of the known-in sign bits, return the
+      // number that are left.  This handles rotl(sext(x), 1) for example.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (Tmp > RotAmt+1) return Tmp-RotAmt;
+    }
+    break;
+  case ISD::ADD:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      
+    // Special case decrementing a value (ADD X, -1):
+    if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+      if (CRHS->isAllOnesValue()) {
+        uint64_t KnownZero, KnownOne;
+        uint64_t Mask = MVT::getIntVTBitMask(VT);
+        ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+        
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero|1) == Mask)
+          return VTBits;
+        
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (KnownZero & MVT::getIntVTSignBit(VT))
+          return Tmp;
+      }
+      
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+      return std::min(Tmp, Tmp2)-1;
+    break;
+    
+  case ISD::SUB:
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+      
+    // Handle NEG.
+    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+      if (CLHS->getValue() == 0) {
+        uint64_t KnownZero, KnownOne;
+        uint64_t Mask = MVT::getIntVTBitMask(VT);
+        ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero|1) == Mask)
+          return VTBits;
+        
+        // If the input is known to be positive (the sign bit is known clear),
+        // the output of the NEG has the same number of sign bits as the input.
+        if (KnownZero & MVT::getIntVTSignBit(VT))
+          return Tmp2;
+        
+        // Otherwise, we treat this like a SUB.
+      }
+    
+    // Sub can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      return std::min(Tmp, Tmp2)-1;
+    break;
+  case ISD::TRUNCATE:
+    // FIXME: it's tricky to do anything useful for this, but it is an important
+    // case for targets like X86.
+    break;
+  }
+  
+  // Allow the target to implement this method for its nodes.
+  if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+      Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 
+      Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+      Op.getOpcode() == ISD::INTRINSIC_VOID) {
+    unsigned NumBits = ComputeNumSignBitsForTargetNode(Op, Depth);
+    if (NumBits > 1) return NumBits;
+  }
+  
+  // Finally, if we can prove that the top bits of the result are 0's or 1's,
+  // use this information.
+  uint64_t KnownZero, KnownOne;
+  uint64_t Mask = MVT::getIntVTBitMask(VT);
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  
+  uint64_t SignBit = MVT::getIntVTSignBit(VT);
+  if (KnownZero & SignBit) {        // SignBit is 0
+    Mask = KnownZero;
+  } else if (KnownOne & SignBit) {  // SignBit is 1;
+    Mask = KnownOne;
+  } else {
+    // Nothing known.
+    return 1;
+  }
+  
+  // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
+  // the number of identical bits in the top of the input value.
+  Mask ^= ~0ULL;
+  Mask <<= 64-VTBits;
+  // Return # leading zeros.  We use 'min' here in case Val was zero before
+  // shifting.  We don't want to return '64' as for an i32 "0".
+  return std::min(VTBits, CountLeadingZeros_64(Mask));
+}
+
+
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op,
+                                                         unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use ComputeNumSignBits if you don't know whether Op"
+         " is a target node!");
+  return 1;
+}
+
+
+SDOperand TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+  // Default implementation: no optimization.
+  return SDOperand();
+}
+
  //===----------------------------------------------------------------------===//
  //  Inline Assembler Implementation Methods
  //===----------------------------------------------------------------------===//
@@ -700,6 +1241,10 @@ TargetLowering::getConstraintType(char ConstraintLetter) const {
    switch (ConstraintLetter) {
    default: return C_Unknown;
    case 'r': return C_RegisterClass;
+  case 'm':    // memory
+  case 'o':    // offsetable
+  case 'V':    // not offsetable
+    return C_Memory;
    case 'i':    // Simple Integer or Relocatable Constant
    case 'n':    // Simple Integer
    case 's':    // Relocatable Constant
@@ -710,7 +1255,8 @@ TargetLowering::getConstraintType(char ConstraintLetter) const {
    case 'M':
    case 'N':
    case 'O':
-  case 'P':  return C_Other;
+  case 'P':
+    return C_Other;
    }
  }
  
@@ -727,23 +1273,60 @@ bool TargetLowering::isOperandValidForConstraint(SDOperand Op,
  
  
  std::vector<unsigned> TargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint) const {
-  // Not a physreg, must not be a register reference or something.
-  if (Constraint[0] != '{') return std::vector<unsigned>();
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  MVT::ValueType VT) const {
+  return std::vector<unsigned>();
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             MVT::ValueType VT) const {
+  if (Constraint[0] != '{')
+    return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
    assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
  
    // Remove the braces from around the name.
    std::string RegName(Constraint.begin()+1, Constraint.end()-1);
-  
-  // Scan to see if this constraint is a register name.
+
+  // Figure out which register class contains this reg.
    const MRegisterInfo *RI = TM.getRegisterInfo();
-  for (unsigned i = 1, e = RI->getNumRegs(); i != e; ++i) {
-    if (const char *Name = RI->get(i).Name)
-      if (StringsEqualNoCase(RegName, Name))
-        return std::vector<unsigned>(1, i);
+  for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+       E = RI->regclass_end(); RCI != E; ++RCI) {
+    const TargetRegisterClass *RC = *RCI;
+    
+    // If none of the the value types for this register class are valid, we 
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    bool isLegal = false;
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (isTypeLegal(*I)) {
+        isLegal = true;
+        break;
+      }
+    }
+    
+    if (!isLegal) continue;
+    
+    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 
+         I != E; ++I) {
+      if (StringsEqualNoCase(RegName, RI->get(*I).Name))
+        return std::make_pair(*I, RC);
+    }
    }
    
-  // Unknown physreg.
-  return std::vector<unsigned>();
+  return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
  }
  
+//===----------------------------------------------------------------------===//
+//  Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressImmediate - Return true if the integer value or
+/// GlobalValue can be used as the offset of the target addressing mode.
+bool TargetLowering::isLegalAddressImmediate(int64_t V) const {
+  return false;
+}
+bool TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
+  return false;
+}