Add another debug statement.

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAGBuilder.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index a40c429f5701ffa4be7a6669aa029df2b0e2cf7c..2ac9655dd0562cf68580a585b6bfe92eb92b4e64 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -14,8 +14,8 @@
  #define DEBUG_TYPE "isel"
  #include "SDNodeDbgValue.h"
  #include "SelectionDAGBuilder.h"
-#include "FunctionLoweringInfo.h"
  #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/Analysis/ConstantFolding.h"
@@ -32,6 +32,7 @@
  #include "llvm/Module.h"
  #include "llvm/CodeGen/Analysis.h"
  #include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
  #include "llvm/CodeGen/GCStrategy.h"
  #include "llvm/CodeGen/GCMetadata.h"
  #include "llvm/CodeGen/MachineFunction.h"
@@ -40,17 +41,15 @@
  #include "llvm/CodeGen/MachineJumpTableInfo.h"
  #include "llvm/CodeGen/MachineModuleInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
  #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
  #include "llvm/Target/TargetInstrInfo.h"
  #include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
@@ -70,22 +69,45 @@ LimitFPPrecision("limit-float-precision",
                   cl::location(LimitFloatPrecision),
                   cl::init(0));
  
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static const unsigned MaxParallelChains = 64;
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+                                      const SDValue *Parts, unsigned NumParts,
+                                      EVT PartVT, EVT ValueVT);
+
  /// getCopyFromParts - Create a value that contains the specified legal parts
  /// combined into the value they represent.  If the parts combine to a type
  /// larger then ValueVT then AssertOp can be used to specify whether the extra
  /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
  /// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
                                  const SDValue *Parts,
                                  unsigned NumParts, EVT PartVT, EVT ValueVT,
                                  ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+  if (ValueVT.isVector())
+    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+
    assert(NumParts > 0 && "No parts to assemble!");
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    SDValue Val = Parts[0];
  
    if (NumParts > 1) {
      // Assemble the value from multiple parts.
-    if (!ValueVT.isVector() && ValueVT.isInteger()) {
+    if (ValueVT.isInteger()) {
        unsigned PartBits = PartVT.getSizeInBits();
        unsigned ValueBits = ValueVT.getSizeInBits();
  
@@ -100,25 +122,25 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
        EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
  
        if (RoundParts > 2) {
-        Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
+        Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
                                PartVT, HalfVT);
-        Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
+        Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
                                RoundParts / 2, PartVT, HalfVT);
        } else {
-        Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
-        Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
+        Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+        Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
        }
  
        if (TLI.isBigEndian())
          std::swap(Lo, Hi);
  
-      Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
+      Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
  
        if (RoundParts < NumParts) {
          // Assemble the trailing non-power-of-2 part.
          unsigned OddParts = NumParts - RoundParts;
          EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
-        Hi = getCopyFromParts(DAG, dl,
+        Hi = getCopyFromParts(DAG, DL,
                                Parts + RoundParts, OddParts, PartVT, OddVT);
  
          // Combine the round and odd parts.
@@ -126,68 +148,29 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
          if (TLI.isBigEndian())
            std::swap(Lo, Hi);
          EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
-        Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
-        Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
+        Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+        Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
                           DAG.getConstant(Lo.getValueType().getSizeInBits(),
                                           TLI.getPointerTy()));
-        Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
-        Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
-      }
-    } else if (ValueVT.isVector()) {
-      // Handle a multi-element vector.
-      EVT IntermediateVT, RegisterVT;
-      unsigned NumIntermediates;
-      unsigned NumRegs =
-        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
-                                   NumIntermediates, RegisterVT);
-      assert(NumRegs == NumParts
-             && "Part count doesn't match vector breakdown!");
-      NumParts = NumRegs; // Silence a compiler warning.
-      assert(RegisterVT == PartVT
-             && "Part type doesn't match vector breakdown!");
-      assert(RegisterVT == Parts[0].getValueType() &&
-             "Part type doesn't match part!");
-
-      // Assemble the parts into intermediate operands.
-      SmallVector<SDValue, 8> Ops(NumIntermediates);
-      if (NumIntermediates == NumParts) {
-        // If the register was not expanded, truncate or copy the value,
-        // as appropriate.
-        for (unsigned i = 0; i != NumParts; ++i)
-          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
-                                    PartVT, IntermediateVT);
-      } else if (NumParts > 0) {
-        // If the intermediate type was expanded, build the intermediate
-        // operands from the parts.
-        assert(NumParts % NumIntermediates == 0 &&
-               "Must expand into a divisible number of parts!");
-        unsigned Factor = NumParts / NumIntermediates;
-        for (unsigned i = 0; i != NumIntermediates; ++i)
-          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
-                                    PartVT, IntermediateVT);
+        Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+        Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
        }
-
-      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
-      // intermediate operands.
-      Val = DAG.getNode(IntermediateVT.isVector() ?
-                        ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
-                        ValueVT, &Ops[0], NumIntermediates);
      } else if (PartVT.isFloatingPoint()) {
        // FP split into multiple FP parts (for ppcf128)
        assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
               "Unexpected split");
        SDValue Lo, Hi;
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
+      Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
        if (TLI.isBigEndian())
          std::swap(Lo, Hi);
-      Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+      Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
      } else {
        // FP split into integer parts (soft fp)
        assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
               !PartVT.isVector() && "Unexpected split");
        EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
-      Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
+      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
      }
    }
  
@@ -197,182 +180,319 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
    if (PartVT == ValueVT)
      return Val;
  
-  if (PartVT.isVector()) {
-    assert(ValueVT.isVector() && "Unknown vector conversion!");
-    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
-  }
-
-  if (ValueVT.isVector()) {
-    assert(ValueVT.getVectorElementType() == PartVT &&
-           ValueVT.getVectorNumElements() == 1 &&
-           "Only trivial scalar-to-vector conversions should get here!");
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
-  }
-
-  if (PartVT.isInteger() &&
-      ValueVT.isInteger()) {
+  if (PartVT.isInteger() && ValueVT.isInteger()) {
      if (ValueVT.bitsLT(PartVT)) {
        // For a truncate, see if we have any information to
        // indicate whether the truncated bits will always be
        // zero or sign-extension.
        if (AssertOp != ISD::DELETED_NODE)
-        Val = DAG.getNode(AssertOp, dl, PartVT, Val,
+        Val = DAG.getNode(AssertOp, DL, PartVT, Val,
                            DAG.getValueType(ValueVT));
-      return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
-    } else {
-      return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
+      return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
      }
+    return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
    }
  
    if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
-    if (ValueVT.bitsLT(Val.getValueType())) {
-      // FP_ROUND's are always exact here.
-      return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
-                         DAG.getIntPtrConstant(1));
-    }
+    // FP_ROUND's are always exact here.
+    if (ValueVT.bitsLT(Val.getValueType()))
+      return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
+                         DAG.getTargetConstant(1, TLI.getPointerTy()));
  
-    return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
+    return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
    }
  
    if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
-    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
  
    llvm_unreachable("Unknown mismatch!");
-  return SDValue();
  }
  
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent.  If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+                                      const SDValue *Parts, unsigned NumParts,
+                                      EVT PartVT, EVT ValueVT) {
+  assert(ValueVT.isVector() && "Not a vector value");
+  assert(NumParts > 0 && "No parts to assemble!");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Val = Parts[0];
+
+  // Handle a multi-element vector.
+  if (NumParts > 1) {
+    EVT IntermediateVT, RegisterVT;
+    unsigned NumIntermediates;
+    unsigned NumRegs =
+    TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                               NumIntermediates, RegisterVT);
+    assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+    NumParts = NumRegs; // Silence a compiler warning.
+    assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+    assert(RegisterVT == Parts[0].getValueType() &&
+           "Part type doesn't match part!");
+
+    // Assemble the parts into intermediate operands.
+    SmallVector<SDValue, 8> Ops(NumIntermediates);
+    if (NumIntermediates == NumParts) {
+      // If the register was not expanded, truncate or copy the value,
+      // as appropriate.
+      for (unsigned i = 0; i != NumParts; ++i)
+        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+                                  PartVT, IntermediateVT);
+    } else if (NumParts > 0) {
+      // If the intermediate type was expanded, build the intermediate
+      // operands from the parts.
+      assert(NumParts % NumIntermediates == 0 &&
+             "Must expand into a divisible number of parts!");
+      unsigned Factor = NumParts / NumIntermediates;
+      for (unsigned i = 0; i != NumIntermediates; ++i)
+        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+                                  PartVT, IntermediateVT);
+    }
+
+    // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+    // intermediate operands.
+    Val = DAG.getNode(IntermediateVT.isVector() ?
+                      ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
+                      ValueVT, &Ops[0], NumIntermediates);
+  }
+
+  // There is now one part, held in Val.  Correct it to match ValueVT.
+  PartVT = Val.getValueType();
+
+  if (PartVT == ValueVT)
+    return Val;
+
+  if (PartVT.isVector()) {
+    // If the element type of the source/dest vectors are the same, but the
+    // parts vector has more elements than the value vector, then we have a
+    // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
+    // elements we want.
+    if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+      assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+             "Cannot narrow, it would be a lossy transformation");
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+                         DAG.getIntPtrConstant(0));
+    }
+
+    // Vector/Vector bitcast.
+    if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+    assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+      "Cannot handle this kind of promotion");
+    // Promoted vector extract
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT, Val);
+
+  }
+
+  // Trivial bitcast if the types are the same size and the destination
+  // vector type is legal.
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+      TLI.isTypeLegal(ValueVT))
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+  // Handle cases such as i8 -> <1 x i1>
+  assert(ValueVT.getVectorNumElements() == 1 &&
+         "Only trivial scalar-to-vector conversions should get here!");
+
+  if (ValueVT.getVectorNumElements() == 1 &&
+      ValueVT.getVectorElementType() != PartVT) {
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT.getScalarType(), Val);
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+
+
+
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
+                                 SDValue Val, SDValue *Parts, unsigned NumParts,
+                                 EVT PartVT);
+
  /// getCopyToParts - Create a series of nodes that contain the specified value
  /// split into legal parts.  If the parts contain more bits than Val, then, for
  /// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
                             SDValue Val, SDValue *Parts, unsigned NumParts,
                             EVT PartVT,
                             ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT PtrVT = TLI.getPointerTy();
    EVT ValueVT = Val.getValueType();
+
+  // Handle the vector case separately.
+  if (ValueVT.isVector())
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    unsigned PartBits = PartVT.getSizeInBits();
    unsigned OrigNumParts = NumParts;
    assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
  
-  if (!NumParts)
+  if (NumParts == 0)
      return;
  
-  if (!ValueVT.isVector()) {
-    if (PartVT == ValueVT) {
-      assert(NumParts == 1 && "No-op copy with multiple parts!");
-      Parts[0] = Val;
-      return;
-    }
-
-    if (NumParts * PartBits > ValueVT.getSizeInBits()) {
-      // If the parts cover more bits than the value has, promote the value.
-      if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
-        assert(NumParts == 1 && "Do not know what to promote to!");
-        Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
-      } else if (PartVT.isInteger() && ValueVT.isInteger()) {
-        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
-        Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
-      } else {
-        llvm_unreachable("Unknown mismatch!");
-      }
-    } else if (PartBits == ValueVT.getSizeInBits()) {
-      // Different types of the same size.
-      assert(NumParts == 1 && PartVT != ValueVT);
-      Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
-    } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
-      // If the parts cover less bits than value has, truncate the value.
-      if (PartVT.isInteger() && ValueVT.isInteger()) {
-        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
-        Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
-      } else {
-        llvm_unreachable("Unknown mismatch!");
-      }
-    }
-
-    // The value may have changed - recompute ValueVT.
-    ValueVT = Val.getValueType();
-    assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
-           "Failed to tile the value with PartVT!");
+  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+  if (PartVT == ValueVT) {
+    assert(NumParts == 1 && "No-op copy with multiple parts!");
+    Parts[0] = Val;
+    return;
+  }
  
-    if (NumParts == 1) {
-      assert(PartVT == ValueVT && "Type conversion failed!");
-      Parts[0] = Val;
-      return;
+  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+    // If the parts cover more bits than the value has, promote the value.
+    if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+      assert(NumParts == 1 && "Do not know what to promote to!");
+      Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+    } else {
+      assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+             ValueVT.isInteger() &&
+             "Unknown mismatch!");
+      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+      Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+      if (PartVT == MVT::x86mmx)
+        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
      }
+  } else if (PartBits == ValueVT.getSizeInBits()) {
+    // Different types of the same size.
+    assert(NumParts == 1 && PartVT != ValueVT);
+    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+    // If the parts cover less bits than value has, truncate the value.
+    assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+           ValueVT.isInteger() &&
+           "Unknown mismatch!");
+    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+    if (PartVT == MVT::x86mmx)
+      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+  }
+
+  // The value may have changed - recompute ValueVT.
+  ValueVT = Val.getValueType();
+  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+         "Failed to tile the value with PartVT!");
  
-    // Expand the value into multiple parts.
-    if (NumParts & (NumParts - 1)) {
-      // The number of parts is not a power of 2.  Split off and copy the tail.
-      assert(PartVT.isInteger() && ValueVT.isInteger() &&
-             "Do not know what to expand to!");
-      unsigned RoundParts = 1 << Log2_32(NumParts);
-      unsigned RoundBits = RoundParts * PartBits;
-      unsigned OddParts = NumParts - RoundParts;
-      SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
-                                   DAG.getConstant(RoundBits,
-                                                   TLI.getPointerTy()));
-      getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
-                     OddParts, PartVT);
-
-      if (TLI.isBigEndian())
-        // The odd parts were reversed by getCopyToParts - unreverse them.
-        std::reverse(Parts + RoundParts, Parts + NumParts);
+  if (NumParts == 1) {
+    assert(PartVT == ValueVT && "Type conversion failed!");
+    Parts[0] = Val;
+    return;
+  }
  
-      NumParts = RoundParts;
-      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
-      Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
-    }
+  // Expand the value into multiple parts.
+  if (NumParts & (NumParts - 1)) {
+    // The number of parts is not a power of 2.  Split off and copy the tail.
+    assert(PartVT.isInteger() && ValueVT.isInteger() &&
+           "Do not know what to expand to!");
+    unsigned RoundParts = 1 << Log2_32(NumParts);
+    unsigned RoundBits = RoundParts * PartBits;
+    unsigned OddParts = NumParts - RoundParts;
+    SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+                                 DAG.getIntPtrConstant(RoundBits));
+    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
  
-    // The number of parts is a power of 2.  Repeatedly bisect the value using
-    // EXTRACT_ELEMENT.
-    Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           EVT::getIntegerVT(*DAG.getContext(),
-                                             ValueVT.getSizeInBits()),
-                           Val);
-
-    for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
-      for (unsigned i = 0; i < NumParts; i += StepSize) {
-        unsigned ThisBits = StepSize * PartBits / 2;
-        EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
-        SDValue &Part0 = Parts[i];
-        SDValue &Part1 = Parts[i+StepSize/2];
-
-        Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
-                            ThisVT, Part0,
-                            DAG.getConstant(1, PtrVT));
-        Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
-                            ThisVT, Part0,
-                            DAG.getConstant(0, PtrVT));
-
-        if (ThisBits == PartBits && ThisVT != PartVT) {
-          Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                                PartVT, Part0);
-          Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                                PartVT, Part1);
-        }
+    if (TLI.isBigEndian())
+      // The odd parts were reversed by getCopyToParts - unreverse them.
+      std::reverse(Parts + RoundParts, Parts + NumParts);
+
+    NumParts = RoundParts;
+    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+  }
+
+  // The number of parts is a power of 2.  Repeatedly bisect the value using
+  // EXTRACT_ELEMENT.
+  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+                         EVT::getIntegerVT(*DAG.getContext(),
+                                           ValueVT.getSizeInBits()),
+                         Val);
+
+  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+    for (unsigned i = 0; i < NumParts; i += StepSize) {
+      unsigned ThisBits = StepSize * PartBits / 2;
+      EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+      SDValue &Part0 = Parts[i];
+      SDValue &Part1 = Parts[i+StepSize/2];
+
+      Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+                          ThisVT, Part0, DAG.getIntPtrConstant(1));
+      Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+                          ThisVT, Part0, DAG.getIntPtrConstant(0));
+
+      if (ThisBits == PartBits && ThisVT != PartVT) {
+        Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+        Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
        }
      }
+  }
  
-    if (TLI.isBigEndian())
-      std::reverse(Parts, Parts + OrigNumParts);
+  if (TLI.isBigEndian())
+    std::reverse(Parts, Parts + OrigNumParts);
+}
  
-    return;
-  }
  
-  // Vector ValueVT.
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
+                                 SDValue Val, SDValue *Parts, unsigned NumParts,
+                                 EVT PartVT) {
+  EVT ValueVT = Val.getValueType();
+  assert(ValueVT.isVector() && "Not a vector");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
    if (NumParts == 1) {
-    if (PartVT != ValueVT) {
-      if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
-        Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
-      } else {
-        assert(ValueVT.getVectorElementType() == PartVT &&
-               ValueVT.getVectorNumElements() == 1 &&
-               "Only trivial vector-to-scalar conversions should get here!");
-        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                          PartVT, Val,
-                          DAG.getConstant(0, PtrVT));
-      }
+    if (PartVT == ValueVT) {
+      // Nothing to do.
+    } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+      // Bitconvert vector->vector case.
+      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+    } else if (PartVT.isVector() &&
+               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
+               PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+      EVT ElementVT = PartVT.getVectorElementType();
+      // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
+      // undef elements.
+      SmallVector<SDValue, 16> Ops;
+      for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                  ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+      for (unsigned i = ValueVT.getVectorNumElements(),
+           e = PartVT.getVectorNumElements(); i != e; ++i)
+        Ops.push_back(DAG.getUNDEF(ElementVT));
+
+      Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+
+      // FIXME: Use CONCAT for 2x -> 4x.
+
+      //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+      //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+    } else if (PartVT.isVector() &&
+               PartVT.getVectorElementType().bitsGE(
+                 ValueVT.getVectorElementType()) &&
+               PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+      // Promoted vector extract
+      bool Smaller = PartVT.bitsLE(ValueVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                        DL, PartVT, Val);
+    } else{
+      // Vector -> scalar conversion.
+      assert(ValueVT.getVectorNumElements() == 1 &&
+             "Only trivial vector-to-scalar conversions should get here!");
+      Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                        PartVT, Val, DAG.getIntPtrConstant(0));
+
+      bool Smaller = ValueVT.bitsLE(PartVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                         DL, PartVT, Val);
      }
  
      Parts[0] = Val;
@@ -383,7 +503,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
    EVT IntermediateVT, RegisterVT;
    unsigned NumIntermediates;
    unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
-                              IntermediateVT, NumIntermediates, RegisterVT);
+                                                IntermediateVT,
+                                                NumIntermediates, RegisterVT);
    unsigned NumElements = ValueVT.getVectorNumElements();
  
    assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
@@ -394,14 +515,12 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
    SmallVector<SDValue, 8> Ops(NumIntermediates);
    for (unsigned i = 0; i != NumIntermediates; ++i) {
      if (IntermediateVT.isVector())
-      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
                             IntermediateVT, Val,
-                           DAG.getConstant(i * (NumElements / NumIntermediates),
-                                           PtrVT));
+                   DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
      else
-      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                           IntermediateVT, Val,
-                           DAG.getConstant(i, PtrVT));
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                           IntermediateVT, Val, DAG.getIntPtrConstant(i));
    }
  
    // Split the intermediate operands into legal parts.
@@ -409,7 +528,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
      // If the register was not expanded, promote or copy the value,
      // as appropriate.
      for (unsigned i = 0; i != NumParts; ++i)
-      getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
    } else if (NumParts > 0) {
      // If the intermediate type was expanded, split each the value into
      // legal parts.
@@ -417,10 +536,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
             "Must expand into a divisible number of parts!");
      unsigned Factor = NumParts / NumIntermediates;
      for (unsigned i = 0; i != NumIntermediates; ++i)
-      getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
    }
  }
  
+
+
+
  namespace {
    /// RegsForValue - This struct represents the registers (physical or virtual)
    /// that a particular set of values is assigned, and the type information
@@ -460,13 +582,8 @@ namespace {
                   EVT regvt, EVT valuevt)
        : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
  
-    RegsForValue(const SmallVector<unsigned, 4> &regs,
-                 const SmallVector<EVT, 4> &regvts,
-                 const SmallVector<EVT, 4> &valuevts)
-      : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
-
      RegsForValue(LLVMContext &Context, const TargetLowering &tli,
-                 unsigned Reg, const Type *Ty) {
+                 unsigned Reg, Type *Ty) {
        ComputeValueVTs(tli, Ty, ValueVTs);
  
        for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
@@ -530,6 +647,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
                                        FunctionLoweringInfo &FuncInfo,
                                        DebugLoc dl,
                                        SDValue &Chain, SDValue *Flag) const {
+  // A Value with type {} or [0 x %t] needs no registers.
+  if (ValueVTs.empty())
+    return SDValue();
+
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  
    // Assemble the legal parts into the final values.
@@ -552,48 +673,50 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
        }
  
        Chain = P.getValue(1);
+      Parts[i] = P;
  
        // If the source register was virtual and if we know something about it,
        // add an assert node.
-      if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
-          RegisterVT.isInteger() && !RegisterVT.isVector()) {
-        unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
-        if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
-          const FunctionLoweringInfo::LiveOutInfo &LOI =
-            FuncInfo.LiveOutRegInfo[SlotNo];
-
-          unsigned RegSize = RegisterVT.getSizeInBits();
-          unsigned NumSignBits = LOI.NumSignBits;
-          unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
-
-          // FIXME: We capture more information than the dag can represent.  For
-          // now, just use the tightest assertzext/assertsext possible.
-          bool isSExt = true;
-          EVT FromVT(MVT::Other);
-          if (NumSignBits == RegSize)
-            isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
-          else if (NumZeroBits >= RegSize-1)
-            isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
-          else if (NumSignBits > RegSize-8)
-            isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
-          else if (NumZeroBits >= RegSize-8)
-            isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
-          else if (NumSignBits > RegSize-16)
-            isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
-          else if (NumZeroBits >= RegSize-16)
-            isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
-          else if (NumSignBits > RegSize-32)
-            isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
-          else if (NumZeroBits >= RegSize-32)
-            isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
-
-          if (FromVT != MVT::Other)
-            P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
-                            RegisterVT, P, DAG.getValueType(FromVT));
-        }
-      }
+      if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+          !RegisterVT.isInteger() || RegisterVT.isVector())
+        continue;
  
-      Parts[i] = P;
+      const FunctionLoweringInfo::LiveOutInfo *LOI =
+        FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+      if (!LOI)
+        continue;
+
+      unsigned RegSize = RegisterVT.getSizeInBits();
+      unsigned NumSignBits = LOI->NumSignBits;
+      unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
+
+      // FIXME: We capture more information than the dag can represent.  For
+      // now, just use the tightest assertzext/assertsext possible.
+      bool isSExt = true;
+      EVT FromVT(MVT::Other);
+      if (NumSignBits == RegSize)
+        isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
+      else if (NumZeroBits >= RegSize-1)
+        isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
+      else if (NumSignBits > RegSize-8)
+        isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
+      else if (NumZeroBits >= RegSize-8)
+        isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
+      else if (NumSignBits > RegSize-16)
+        isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
+      else if (NumZeroBits >= RegSize-16)
+        isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+      else if (NumSignBits > RegSize-32)
+        isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
+      else if (NumZeroBits >= RegSize-32)
+        isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+      else
+        continue;
+
+      // Add an assertion node.
+      assert(FromVT != MVT::Other);
+      Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+                             RegisterVT, P, DAG.getValueType(FromVT));
      }
  
      Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
@@ -623,8 +746,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
      unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
      EVT RegisterVT = RegVTs[Value];
  
-    getCopyToParts(DAG, dl,
-                   Val.getValue(Val.getResNo() + Value),
+    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
                     &Parts[Part], NumParts, RegisterVT);
      Part += NumParts;
    }
@@ -671,6 +793,18 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
    unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
    if (HasMatching)
      Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+  else if (!Regs.empty() &&
+           TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+    // Put the register class of the virtual registers in the flag word.  That
+    // way, later passes can recompute register class constraints for inline
+    // assembly as well as normal instructions.
+    // Don't do this for tied operands that can use the regclass information
+    // from the def.
+    const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+    const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+    Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+  }
+
    SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
    Ops.push_back(Res);
  
@@ -684,10 +818,13 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
    }
  }
  
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+                               const TargetLibraryInfo *li) {
    AA = &aa;
    GFI = gfi;
+  LibInfo = li;
    TD = DAG.getTarget().getTargetData();
+  LPadToCallSiteMap.clear();
  }
  
  /// clear - Clear out the current SelectionDAG and the associated
@@ -705,6 +842,16 @@ void SelectionDAGBuilder::clear() {
    HasTailCall = false;
  }
  
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is seperated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+  DanglingDebugInfoMap.clear();
+}
+
  /// getRoot - Return the current virtual root of the Selection DAG,
  /// flushing any PendingLoad items. This must be done before emitting
  /// a store or any other node that may need to be ordered after any
@@ -805,7 +952,31 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
    }
  }
  
-// getValue - Return an SDValue for the given Value.
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+                                                   SDValue Val) {
+  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+  if (DDI.getDI()) {
+    const DbgValueInst *DI = DDI.getDI();
+    DebugLoc dl = DDI.getdl();
+    unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+    MDNode *Variable = DI->getVariable();
+    uint64_t Offset = DI->getOffset();
+    SDDbgValue *SDV;
+    if (Val.getNode()) {
+      if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+        SDV = DAG.getDbgValue(Variable, Val.getNode(),
+                              Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+        DAG.AddDbgValue(SDV, Val.getNode(), false);
+      }
+    } else
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+    DanglingDebugInfoMap[V] = DanglingDebugInfo();
+  }
+}
+
+/// getValue - Return an SDValue for the given Value.
  SDValue SelectionDAGBuilder::getValue(const Value *V) {
    // If we already have an SDValue for this value, use it. It's important
    // to do this first, so that we don't create a CopyFromReg if we already
@@ -820,12 +991,15 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
      unsigned InReg = It->second;
      RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
      SDValue Chain = DAG.getEntryNode();
-    return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    resolveDanglingDebugInfo(V, N);
+    return N;
    }
  
    // Otherwise create a new SDValue and remember it.
    SDValue Val = getValueImpl(V);
    NodeMap[V] = Val;
+  resolveDanglingDebugInfo(V, Val);
    return Val;
  }
  
@@ -839,10 +1013,11 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
    // Otherwise create a new SDValue and remember it.
    SDValue Val = getValueImpl(V);
    NodeMap[V] = Val;
+  resolveDanglingDebugInfo(V, Val);
    return Val;
  }
  
-/// getValueImpl - Helper function for getValue and getMaterializedValue.
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
  /// Create an SDValue for the given value.
  SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
    if (const Constant *C = dyn_cast<Constant>(V)) {
@@ -852,7 +1027,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
        return DAG.getConstant(*CI, VT);
  
      if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-      return DAG.getGlobalAddress(GV, VT);
+      return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
  
      if (isa<ConstantPointerNull>(C))
        return DAG.getConstant(0, TLI.getPointerTy());
@@ -886,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
        return DAG.getMergeValues(&Constants[0], Constants.size(),
                                  getCurDebugLoc());
      }
+    
+    if (const ConstantDataSequential *CDS =
+          dyn_cast<ConstantDataSequential>(C)) {
+      SmallVector<SDValue, 4> Ops;
+      for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+        SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
+        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+          Ops.push_back(SDValue(Val, i));
+      }
+
+      if (isa<ArrayType>(CDS->getType()))
+        return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+      return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                                      VT, &Ops[0], Ops.size());
+    }
  
      if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
        assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
@@ -914,15 +1106,15 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
      if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
        return DAG.getBlockAddress(BA, VT);
  
-    const VectorType *VecTy = cast<VectorType>(V->getType());
+    VectorType *VecTy = cast<VectorType>(V->getType());
      unsigned NumElements = VecTy->getNumElements();
  
      // Now that we know the number and type of the elements, get that number of
      // elements into the Ops array based on what kind of constant it is.
      SmallVector<SDValue, 16> Ops;
-    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+    if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
        for (unsigned i = 0; i != NumElements; ++i)
-        Ops.push_back(getValue(CP->getOperand(i)));
+        Ops.push_back(getValue(CV->getOperand(i)));
      } else {
        assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
        EVT EltVT = TLI.getValueType(VecTy->getElementType());
@@ -951,82 +1143,19 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
  
    // If this is an instruction which fast-isel has deferred, select it now.
    if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
-    assert(Inst->isSafeToSpeculativelyExecute() &&
-           "Instruction with side effects deferred!");
-    visit(*Inst);
-    DenseMap<const Value *, SDValue>::iterator NIt = NodeMap.find(Inst);
-    if (NIt != NodeMap.end() && NIt->second.getNode())
-      return NIt->second;
+    unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+    RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+    SDValue Chain = DAG.getEntryNode();
+    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
    }
  
    llvm_unreachable("Can't get register for value!");
-  return SDValue();
-}
-
-/// Get the EVTs and ArgFlags collections that represent the legalized return 
-/// type of the given function.  This does not require a DAG or a return value,
-/// and is suitable for use before any DAGs for the function are constructed.
-static void getReturnInfo(const Type* ReturnType,
-                   Attributes attr, SmallVectorImpl<EVT> &OutVTs,
-                   SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
-                   const TargetLowering &TLI,
-                   SmallVectorImpl<uint64_t> *Offsets = 0) {
-  SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, ReturnType, ValueVTs);
-  unsigned NumValues = ValueVTs.size();
-  if (NumValues == 0) return;
-  unsigned Offset = 0;
-
-  for (unsigned j = 0, f = NumValues; j != f; ++j) {
-    EVT VT = ValueVTs[j];
-    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
-    if (attr & Attribute::SExt)
-      ExtendKind = ISD::SIGN_EXTEND;
-    else if (attr & Attribute::ZExt)
-      ExtendKind = ISD::ZERO_EXTEND;
-
-    // FIXME: C calling convention requires the return type to be promoted to
-    // at least 32-bit. But this is not necessary for non-C calling
-    // conventions. The frontend should mark functions whose return values
-    // require promoting with signext or zeroext attributes.
-    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
-      if (VT.bitsLT(MinVT))
-        VT = MinVT;
-    }
-
-    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
-    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
-    unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
-                        PartVT.getTypeForEVT(ReturnType->getContext()));
-
-    // 'inreg' on function refers to return value
-    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-    if (attr & Attribute::InReg)
-      Flags.setInReg();
-
-    // Propagate extension type if any
-    if (attr & Attribute::SExt)
-      Flags.setSExt();
-    else if (attr & Attribute::ZExt)
-      Flags.setZExt();
-
-    for (unsigned i = 0; i < NumParts; ++i) {
-      OutVTs.push_back(PartVT);
-      OutFlags.push_back(Flags);
-      if (Offsets)
-      {
-        Offsets->push_back(Offset);
-        Offset += PartSize;
-      }
-    }
-  }
  }
  
  void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
    SDValue Chain = getControlRoot();
    SmallVector<ISD::OutputArg, 8> Outs;
+  SmallVector<SDValue, 8> OutVals;
  
    if (!FuncInfo.CanLowerReturn) {
      unsigned DemoteReg = FuncInfo.DemoteRegister;
@@ -1048,14 +1177,15 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
      unsigned NumValues = ValueVTs.size();
  
      SmallVector<SDValue, 4> Chains(NumValues);
-    EVT PtrVT = PtrValueVTs[0];
      for (unsigned i = 0; i != NumValues; ++i) {
-      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
-                                DAG.getConstant(Offsets[i], PtrVT));
+      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                                RetPtr.getValueType(), RetPtr,
+                                DAG.getIntPtrConstant(Offsets[i]));
        Chains[i] =
          DAG.getStore(Chain, getCurDebugLoc(),
                       SDValue(RetOp.getNode(), RetOp.getResNo() + i),
-                     Add, NULL, Offsets[i], false, false, 0);
+                     // FIXME: better loc info would be nice.
+                     Add, MachinePointerInfo(), false, false, 0);
      }
  
      Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -1077,15 +1207,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
          else if (F->paramHasAttr(0, Attribute::ZExt))
            ExtendKind = ISD::ZERO_EXTEND;
  
-        // FIXME: C calling convention requires the return type to be promoted
-        // to at least 32-bit. But this is not necessary for non-C calling
-        // conventions. The frontend should mark functions whose return values
-        // require promoting with signext or zeroext attributes.
-        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
-          if (VT.bitsLT(MinVT))
-            VT = MinVT;
-        }
+        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+          VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
  
          unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
          EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
@@ -1100,13 +1223,16 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
            Flags.setInReg();
  
          // Propagate extension type if any
-        if (F->paramHasAttr(0, Attribute::SExt))
+        if (ExtendKind == ISD::SIGN_EXTEND)
            Flags.setSExt();
-        else if (F->paramHasAttr(0, Attribute::ZExt))
+        else if (ExtendKind == ISD::ZERO_EXTEND)
            Flags.setZExt();
  
-        for (unsigned i = 0; i < NumParts; ++i)
-          Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
+        for (unsigned i = 0; i < NumParts; ++i) {
+          Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+                                        /*isfixed=*/true));
+          OutVals.push_back(Parts[i]);
+        }
        }
      }
    }
@@ -1115,7 +1241,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
    CallingConv::ID CallConv =
      DAG.getMachineFunction().getFunction()->getCallingConv();
    Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
-                          Outs, getCurDebugLoc(), DAG);
+                          Outs, OutVals, getCurDebugLoc(), DAG);
  
    // Verify that the target's LowerReturn behaved as expected.
    assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
@@ -1129,6 +1255,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
  /// created for it, emit nodes to copy the value into the virtual
  /// registers.
  void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+  // Skip empty types
+  if (V->getType()->isEmptyTy())
+    return;
+
    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
    if (VMI != FuncInfo.ValueMap.end()) {
      assert(!V->use_empty() && "Unused value assigned virtual registers!");
@@ -1177,6 +1307,26 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
    return true;
  }
  
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+                                            const MachineBasicBlock *Dst) const {
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  if (!BPI)
+    return 0;
+  const BasicBlock *SrcBB = Src->getBasicBlock();
+  const BasicBlock *DstBB = Dst->getBasicBlock();
+  return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::
+addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+                       uint32_t Weight /* = 0 */) {
+  if (!Weight)
+    Weight = getEdgeWeight(Src, Dst);
+  Src->addSuccessor(Dst, Weight);
+}
+
+
  static bool InBlock(const Value *V, const BasicBlock *BB) {
    if (const Instruction *I = dyn_cast<Instruction>(V))
      return I->getParent() == BB;
@@ -1209,6 +1359,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
          Condition = getICmpCondCode(IC->getPredicate());
        } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
          Condition = getFCmpCondCode(FC->getPredicate());
+        if (TM.Options.NoNaNsFPMath)
+          Condition = getFCmpCodeWithoutNaN(Condition);
        } else {
          Condition = ISD::SETEQ; // silence warning.
          llvm_unreachable("Unknown compare instruction");
@@ -1311,12 +1463,12 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
      if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
        return false;
    }
-  
+
    return true;
  }
  
  void SelectionDAGBuilder::visitBr(const BranchInst &I) {
-  MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()];
+  MachineBasicBlock *BrMBB = FuncInfo.MBB;
  
    // Update machine-CFG edges.
    MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
@@ -1347,6 +1499,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
  
    // If this is a series of conditions that are or'd or and'd together, emit
    // this as a sequence of branches instead of setcc's with and/or operations.
+  // As long as jumps are not expensive, this should improve performance.
    // For example, instead of something like:
    //     cmp A, B
    //     C = seteq
@@ -1361,7 +1514,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
    //     jle foo
    //
    if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
-    if (BOp->hasOneUse() &&
+    if (!TLI.isJumpExpensive() &&
+        BOp->hasOneUse() &&
          (BOp->getOpcode() == Instruction::And ||
           BOp->getOpcode() == Instruction::Or)) {
        FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
@@ -1444,8 +1598,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
    }
  
    // Update successor info
-  SwitchBB->addSuccessor(CB.TrueBB);
-  SwitchBB->addSuccessor(CB.FalseBB);
+  addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+  addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
  
    // Set NextBlock to be the MBB immediately after the current one, if any.
    // This is used to avoid emitting unnecessary branches to the next block.
@@ -1466,10 +1620,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
                                 MVT::Other, getControlRoot(), Cond,
                                 DAG.getBasicBlock(CB.TrueBB));
  
-  // Insert the false branch.
-  if (CB.FalseBB != NextBlock)
-    BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
-                         DAG.getBasicBlock(CB.FalseBB));
+  // Insert the false branch. Do this even if it's a fall through branch,
+  // this makes it easier to do DAG optimizations which require inverting
+  // the branch condition.
+  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+                       DAG.getBasicBlock(CB.FalseBB));
  
    DAG.setRoot(BrCond);
  }
@@ -1556,12 +1711,28 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
                                    Sub, DAG.getConstant(B.Range, VT),
                                    ISD::SETUGT);
  
-  SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
-                                       TLI.getPointerTy());
+  // Determine the type of the test operands.
+  bool UsePtrType = false;
+  if (!TLI.isTypeLegal(VT))
+    UsePtrType = true;
+  else {
+    for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+      if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
+        // Switch table case range are encoded into series of masks.
+        // Just use pointer type, it's guaranteed to fit.
+        UsePtrType = true;
+        break;
+      }
+  }
+  if (UsePtrType) {
+    VT = TLI.getPointerTy();
+    Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+  }
  
-  B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
+  B.RegVT = VT;
+  B.Reg = FuncInfo.CreateReg(VT);
    SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
-                                    B.Reg, ShiftOp);
+                                    B.Reg, Sub);
  
    // Set NextBlock to be the MBB immediately after the current one, if any.
    // This is used to avoid emitting unnecessary branches to the next block.
@@ -1572,8 +1743,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
  
    MachineBasicBlock* MBB = B.Cases[0].ThisBB;
  
-  SwitchBB->addSuccessor(B.Default);
-  SwitchBB->addSuccessor(MBB);
+  addSuccessorWithWeight(SwitchBB, B.Default);
+  addSuccessorWithWeight(SwitchBB, MBB);
  
    SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                  MVT::Other, CopyTo, RangeCmp,
@@ -1587,41 +1758,47 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
  }
  
  /// visitBitTestCase - this function produces one "bit test"
-void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+                                           MachineBasicBlock* NextMBB,
                                             unsigned Reg,
                                             BitTestCase &B,
                                             MachineBasicBlock *SwitchBB) {
-  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
-                                       TLI.getPointerTy());
+  EVT VT = BB.RegVT;
+  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+                                       Reg, VT);
    SDValue Cmp;
-  if (CountPopulation_64(B.Mask) == 1) {
+  unsigned PopCount = CountPopulation_64(B.Mask);
+  if (PopCount == 1) {
      // Testing for a single bit; just compare the shift count with what it
      // would need to be to shift a 1 bit in that position.
      Cmp = DAG.getSetCC(getCurDebugLoc(),
-                       TLI.getSetCCResultType(ShiftOp.getValueType()),
+                       TLI.getSetCCResultType(VT),
                         ShiftOp,
-                       DAG.getConstant(CountTrailingZeros_64(B.Mask),
-                                       TLI.getPointerTy()),
+                       DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
                         ISD::SETEQ);
+  } else if (PopCount == BB.Range) {
+    // There is only one zero bit in the range, test for it directly.
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       ShiftOp,
+                       DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+                       ISD::SETNE);
    } else {
      // Make desired shift
-    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
-                                    TLI.getPointerTy(),
-                                    DAG.getConstant(1, TLI.getPointerTy()),
-                                    ShiftOp);
+    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+                                    DAG.getConstant(1, VT), ShiftOp);
  
      // Emit bit tests and jumps
      SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
-                                TLI.getPointerTy(), SwitchVal,
-                                DAG.getConstant(B.Mask, TLI.getPointerTy()));
+                                VT, SwitchVal, DAG.getConstant(B.Mask, VT));
      Cmp = DAG.getSetCC(getCurDebugLoc(),
-                       TLI.getSetCCResultType(AndOp.getValueType()),
-                       AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+                       TLI.getSetCCResultType(VT),
+                       AndOp, DAG.getConstant(0, VT),
                         ISD::SETNE);
    }
  
-  SwitchBB->addSuccessor(B.TargetBB);
-  SwitchBB->addSuccessor(NextMBB);
+  addSuccessorWithWeight(SwitchBB, B.TargetBB);
+  addSuccessorWithWeight(SwitchBB, NextMBB);
  
    SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                MVT::Other, getControlRoot(),
@@ -1642,7 +1819,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
  }
  
  void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
-  MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()];
+  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
  
    // Retrieve successors.
    MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
@@ -1659,8 +1836,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
    CopyToExportRegsIfNeeded(&I);
  
    // Update successor info
-  InvokeMBB->addSuccessor(Return);
-  InvokeMBB->addSuccessor(LandingPad);
+  addSuccessorWithWeight(InvokeMBB, Return);
+  addSuccessorWithWeight(InvokeMBB, LandingPad);
  
    // Drop into normal successor.
    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1668,7 +1845,53 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
                            DAG.getBasicBlock(Return)));
  }
  
-void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+  assert(FuncInfo.MBB->isLandingPad() &&
+         "Call to landingpad not in landing pad!");
+
+  MachineBasicBlock *MBB = FuncInfo.MBB;
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  AddLandingPadInfo(LP, MMI, MBB);
+
+  // If there aren't registers to copy the values into (e.g., during SjLj
+  // exceptions), then don't bother to create these DAG nodes.
+  if (TLI.getExceptionPointerRegister() == 0 &&
+      TLI.getExceptionSelectorRegister() == 0)
+    return;
+
+  SmallVector<EVT, 2> ValueVTs;
+  ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+
+  // Insert the EXCEPTIONADDR instruction.
+  assert(FuncInfo.MBB->isLandingPad() &&
+         "Call to eh.exception not in landing pad!");
+  SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+  SDValue Ops[2];
+  Ops[0] = DAG.getRoot();
+  SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1);
+  SDValue Chain = Op1.getValue(1);
+
+  // Insert the EHSELECTION instruction.
+  VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+  Ops[0] = Op1;
+  Ops[1] = Chain;
+  SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2);
+  Chain = Op2.getValue(1);
+  Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32);
+
+  Ops[0] = Op1;
+  Ops[1] = Op2;
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                            DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+                            &Ops[0], 2);
+
+  std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
+  setValue(&LP, RetPair.first);
+  DAG.setRoot(RetPair.second);
  }
  
  /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
@@ -1696,10 +1919,56 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
    if (++BBI != FuncInfo.MF->end())
      NextBlock = BBI;
  
-  // TODO: If any two of the cases has the same destination, and if one value
+  // If any two of the cases has the same destination, and if one value
    // is the same as the other, but has one bit unset that the other has set,
    // use bit manipulation to do two compares at once.  For example:
    // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+  // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+  // TODO: Handle cases where CR.CaseBB != SwitchBB.
+  if (Size == 2 && CR.CaseBB == SwitchBB) {
+    Case &Small = *CR.Range.first;
+    Case &Big = *(CR.Range.second-1);
+
+    if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+      const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+      const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+      // Check that there is only one bit different.
+      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+          (SmallValue | BigValue) == BigValue) {
+        // Isolate the common bit.
+        APInt CommonBit = BigValue & ~SmallValue;
+        assert((SmallValue | CommonBit) == BigValue &&
+               CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+        SDValue CondLHS = getValue(SV);
+        EVT VT = CondLHS.getValueType();
+        DebugLoc DL = getCurDebugLoc();
+
+        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                 DAG.getConstant(CommonBit, VT));
+        SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+                                    Or, DAG.getConstant(BigValue, VT),
+                                    ISD::SETEQ);
+
+        // Update successor info.
+        addSuccessorWithWeight(SwitchBB, Small.BB);
+        addSuccessorWithWeight(SwitchBB, Default);
+
+        // Insert the true branch.
+        SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+                                     getControlRoot(), Cond,
+                                     DAG.getBasicBlock(Small.BB));
+
+        // Insert the false branch.
+        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+                             DAG.getBasicBlock(Default));
+
+        DAG.setRoot(BrCond);
+        return true;
+      }
+    }
+  }
  
    // Rearrange the case blocks so that the last one falls through if possible.
    if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
@@ -1740,7 +2009,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
        CC = ISD::SETLE;
        LHS = I->Low; MHS = SV; RHS = I->High;
      }
-    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+    uint32_t ExtraWeight = I->ExtraWeight;
+    CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
+                 /* me */ CurBlock,
+                 /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
  
      // If emitting the first comparison, just call visitSwitchCase to emit the
      // code into the current block.  Otherwise, push the CaseBlock onto the
@@ -1758,23 +2031,22 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
  }
  
  static inline bool areJTsAllowed(const TargetLowering &TLI) {
-  return !DisableJumpTables &&
+  return !TLI.getTargetMachine().Options.DisableJumpTables &&
            (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
             TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
  }
  
  static APInt ComputeRange(const APInt &First, const APInt &Last) {
-  APInt LastExt(Last), FirstExt(First);
    uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
-  LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
    return (LastExt - FirstExt + 1ULL);
  }
  
  /// handleJTSwitchCase - Emit jumptable for current switch case range
-bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
-                                             CaseRecVector& WorkList,
-                                             const Value* SV,
-                                             MachineBasicBlock* Default,
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
+                                             CaseRecVector &WorkList,
+                                             const Value *SV,
+                                             MachineBasicBlock *Default,
                                               MachineBasicBlock *SwitchBB) {
    Case& FrontCase = *CR.Range.first;
    Case& BackCase  = *(CR.Range.second-1);
@@ -1783,22 +2055,24 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
    const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
  
    APInt TSize(First.getBitWidth(), 0);
-  for (CaseItr I = CR.Range.first, E = CR.Range.second;
-       I!=E; ++I)
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
      TSize += I->size();
  
    if (!areJTsAllowed(TLI) || TSize.ult(4))
      return false;
  
    APInt Range = ComputeRange(First, Last);
-  double Density = TSize.roundToDouble() / Range.roundToDouble();
-  if (Density < 0.4)
+  // The density is TSize / Range. Require at least 40%.
+  // It should not be possible for IntTSize to saturate for sane code, but make
+  // sure we handle Range saturation correctly.
+  uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
+  uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
+  if (IntTSize * 10 < IntRange * 4)
      return false;
  
    DEBUG(dbgs() << "Lowering jump table\n"
                 << "First entry: " << First << ". Last entry: " << Last << '\n'
-               << "Range: " << Range
-               << "Size: " << TSize << ". Density: " << Density << "\n\n");
+               << "Range: " << Range << ". Size: " << TSize << ".\n\n");
  
    // Get the MachineFunction which holds the current MBB.  This is used when
    // inserting any additional MBBs necessary to represent the switch.
@@ -1816,8 +2090,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
    // table.
    MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
    CurMF->insert(BBI, JumpTableBB);
-  CR.CaseBB->addSuccessor(Default);
-  CR.CaseBB->addSuccessor(JumpTableBB);
+
+  addSuccessorWithWeight(CR.CaseBB, Default);
+  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
  
    // Build a vector of destination BBs, corresponding to each target
    // of the jump table. If the value of the jump table slot corresponds to
@@ -1844,7 +2119,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
           E = DestBBs.end(); I != E; ++I) {
      if (!SuccsHandled[(*I)->getNumber()]) {
        SuccsHandled[(*I)->getNumber()] = true;
-      JumpTableBB->addSuccessor(*I);
+      addSuccessorWithWeight(JumpTableBB, *I);
      }
    }
  
@@ -1861,7 +2136,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
      visitJumpTableHeader(JT, JTH, SwitchBB);
  
    JTCases.push_back(JumpTableBlock(JTH, JT));
-
    return true;
  }
  
@@ -1911,9 +2185,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
      APInt Range = ComputeRange(LEnd, RBegin);
      assert((Range - 2ULL).isNonNegative() &&
             "Invalid case distance");
-    double LDensity = (double)LSize.roundToDouble() /
+    // Use volatile double here to avoid excess precision issues on some hosts,
+    // e.g. that use 80-bit X87 registers.
+    volatile double LDensity =
+       (double)LSize.roundToDouble() /
                             (LEnd - First + 1ULL).roundToDouble();
-    double RDensity = (double)RSize.roundToDouble() /
+    volatile double RDensity =
+      (double)RSize.roundToDouble() /
                             (Last - RBegin + 1ULL).roundToDouble();
      double Metric = Range.logBase2()*(LDensity+RDensity);
      // Should always split in some non-trivial place
@@ -1940,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
  
    CaseRange LHSR(CR.Range.first, Pivot);
    CaseRange RHSR(Pivot, CR.Range.second);
-  Constant *C = Pivot->Low;
+  const Constant *C = Pivot->Low;
    MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
  
    // We know that we branch to the LHS if the Value being switched on is
@@ -2115,7 +2393,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
    }
  
    BitTestBlock BTB(lowBound, cmpRange, SV,
-                   -1U, (CR.CaseBB == SwitchBB),
+                   -1U, MVT::Other, (CR.CaseBB == SwitchBB),
                     CR.CaseBB, Default, BTC);
  
    if (CR.CaseBB == SwitchBB)
@@ -2131,12 +2409,17 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
                                         const SwitchInst& SI) {
    size_t numCmps = 0;
  
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
    // Start with "simple" cases
-  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
-    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
-    Cases.push_back(Case(SI.getSuccessorValue(i),
-                         SI.getSuccessorValue(i),
-                         SMBB));
+  for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+       i != e; ++i) {
+    const BasicBlock *SuccBB = i.getCaseSuccessor();
+    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
+
+    uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
+
+    Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
+                         SMBB, ExtraWeight));
    }
    std::sort(Cases.begin(), Cases.end(), CaseCmp());
  
@@ -2144,7 +2427,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
    if (Cases.size() >= 2)
      // Must recompute end() each iteration because it may be
      // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+         J != Cases.end(); ) {
        const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
        const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
        MachineBasicBlock* nextBB = J->BB;
@@ -2155,6 +2439,16 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
        if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
          I->High = J->High;
          J = Cases.erase(J);
+
+        if (BranchProbabilityInfo *BPI = FuncInfo.BPI) {
+          uint32_t CurWeight = currentBB->getBasicBlock() ?
+            BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16;
+          uint32_t NextWeight = nextBB->getBasicBlock() ?
+            BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16;
+
+          BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(),
+                             CurWeight + NextWeight);
+        }
        } else {
          I = J++;
        }
@@ -2169,8 +2463,21 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
    return numCmps;
  }
  
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+                                           MachineBasicBlock *Last) {
+  // Update JTCases.
+  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+    if (JTCases[i].first.HeaderBB == First)
+      JTCases[i].first.HeaderBB = Last;
+
+  // Update BitTestCases.
+  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+    if (BitTestCases[i].Parent == First)
+      BitTestCases[i].Parent = Last;
+}
+
  void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
-  MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()];
+  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
  
    // Figure out which block is immediately after the current one.
    MachineBasicBlock *NextBlock = 0;
@@ -2178,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
  
    // If there is only the default destination, branch to it if it is not the
    // next basic block.  Otherwise, just fall through.
-  if (SI.getNumOperands() == 2) {
+  if (!SI.getNumCases()) {
      // Update machine-CFG edges.
  
      // If this is not a fall-through branch, emit the branch.
@@ -2198,12 +2505,12 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
    size_t numCmps = Clusterify(Cases, SI);
    DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
                 << ". Total compares: " << numCmps << '\n');
-  numCmps = 0;
+  (void)numCmps;
  
    // Get the Value to be switched on and default basic blocks, which will be
    // inserted into CaseBlock records, representing basic blocks in the binary
    // search tree.
-  const Value *SV = SI.getOperand(0);
+  const Value *SV = SI.getCondition();
  
    // Push the initial CaseRec onto the worklist
    CaseRecVector WorkList;
@@ -2236,7 +2543,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
  }
  
  void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
-  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()];
+  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
  
    // Update machine-CFG edges with unique successors.
    SmallVector<BasicBlock*, 32> succs;
@@ -2245,8 +2552,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
      succs.push_back(I.getSuccessor(i));
    array_pod_sort(succs.begin(), succs.end());
    succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
-  for (unsigned i = 0, e = succs.size(); i != e; ++i)
-    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+    addSuccessorWithWeight(IndirectBrMBB, Succ);
+  }
  
    DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                            MVT::Other, getControlRoot(),
@@ -2255,31 +2564,15 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
  
  void SelectionDAGBuilder::visitFSub(const User &I) {
    // -0.0 - X --> fneg
-  const Type *Ty = I.getType();
-  if (Ty->isVectorTy()) {
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
-      const VectorType *DestTy = cast<VectorType>(I.getType());
-      const Type *ElTy = DestTy->getElementType();
-      unsigned VL = DestTy->getNumElements();
-      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
-      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
-      if (CV == CNZ) {
-        SDValue Op2 = getValue(I.getOperand(1));
-        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                 Op2.getValueType(), Op2));
-        return;
-      }
-    }
+  Type *Ty = I.getType();
+  if (isa<Constant>(I.getOperand(0)) &&
+      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+    SDValue Op2 = getValue(I.getOperand(1));
+    setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                             Op2.getValueType(), Op2));
+    return;
    }
  
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
-    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
-      SDValue Op2 = getValue(I.getOperand(1));
-      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                               Op2.getValueType(), Op2));
-      return;
-    }
-
    visitBinary(I, ISD::FSUB);
  }
  
@@ -2293,37 +2586,51 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
  void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
    SDValue Op1 = getValue(I.getOperand(0));
    SDValue Op2 = getValue(I.getOperand(1));
-  if (!I.getType()->isVectorTy() &&
-      Op2.getValueType() != TLI.getShiftAmountTy()) {
+
+  MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+
+  // Coerce the shift amount to the right type if we can.
+  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+    unsigned ShiftSize = ShiftTy.getSizeInBits();
+    unsigned Op2Size = Op2.getValueType().getSizeInBits();
+    DebugLoc DL = getCurDebugLoc();
+
      // If the operand is smaller than the shift count type, promote it.
-    EVT PTy = TLI.getPointerTy();
-    EVT STy = TLI.getShiftAmountTy();
-    if (STy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
+    if (ShiftSize > Op2Size)
+      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
      // If the operand is larger than the shift count type but the shift
      // count type has enough bits to represent any shift value, truncate
      // it now. This is a common case and it exposes the truncate to
      // optimization early.
-    else if (STy.getSizeInBits() >=
-             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
-    // Otherwise we'll need to temporarily settle for some other
-    // convenient type; type legalization will make adjustments as
-    // needed.
-    else if (PTy.bitsLT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
-    else if (PTy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
+    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+    // Otherwise we'll need to temporarily settle for some other convenient
+    // type.  Type legalization will make adjustments once the shiftee is split.
+    else
+      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
    }
  
    setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
                             Op1.getValueType(), Op1, Op2));
  }
  
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+
+  // Turn exact SDivs into multiplications.
+  // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+  // exact bit.
+  if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+      !isa<ConstantSDNode>(Op1) &&
+      isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+    setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+  else
+    setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+                             Op1, Op2));
+}
+
  void SelectionDAGBuilder::visitICmp(const User &I) {
    ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
    if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
@@ -2347,6 +2654,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
    SDValue Op1 = getValue(I.getOperand(0));
    SDValue Op2 = getValue(I.getOperand(1));
    ISD::CondCode Condition = getFCmpCondCode(predicate);
+  if (TM.Options.NoNaNsFPMath)
+    Condition = getFCmpCodeWithoutNaN(Condition);
    EVT DestVT = TLI.getValueType(I.getType());
    setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
  }
@@ -2361,10 +2670,12 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
    SDValue Cond     = getValue(I.getOperand(0));
    SDValue TrueVal  = getValue(I.getOperand(1));
    SDValue FalseVal = getValue(I.getOperand(2));
+  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
+    ISD::VSELECT : ISD::SELECT;
  
    for (unsigned i = 0; i != NumValues; ++i)
-    Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
-                          TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+    Values[i] = DAG.getNode(OpCode, getCurDebugLoc(),
+                            TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
                              Cond,
                              SDValue(TrueVal.getNode(),
                                      TrueVal.getResNo() + i),
@@ -2404,11 +2715,12 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
    SDValue N = getValue(I.getOperand(0));
    EVT DestVT = TLI.getValueType(I.getType());
    setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
-                           DestVT, N, DAG.getIntPtrConstant(0)));
+                           DestVT, N,
+                           DAG.getTargetConstant(0, TLI.getPointerTy())));
  }
  
  void SelectionDAGBuilder::visitFPExt(const User &I){
-  // FPTrunc is never a no-op cast, no need to check
+  // FPExt is never a no-op cast, no need to check
    SDValue N = getValue(I.getOperand(0));
    EVT DestVT = TLI.getValueType(I.getType());
    setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
@@ -2463,9 +2775,9 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
    EVT DestVT = TLI.getValueType(I.getType());
  
    // BitCast assures us that source and destination are the same size so this is
-  // either a BIT_CONVERT or a no-op.
+  // either a BITCAST or a no-op.
    if (DestVT != N.getValueType())
-    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+    setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                               DestVT, N)); // convert types.
    else
      setValue(&I, N);            // noop cast.
@@ -2491,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
                             TLI.getValueType(I.getType()), InVec, InIdx));
  }
  
-// Utility for visitShuffleVector - Returns true if the mask is mask starting
-// from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
-  unsigned MaskNumElts = Mask.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i)
-    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// begining // from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+                                int Pos, int Size, int Low) {
+  for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+    if (Mask[i] >= 0 && Mask[i] != Low)
        return false;
    return true;
  }
  
  void SelectionDAGBuilder::visitShuffleVector(const User &I) {
-  SmallVector<int, 8> Mask;
    SDValue Src1 = getValue(I.getOperand(0));
    SDValue Src2 = getValue(I.getOperand(1));
  
-  // Convert the ConstantVector mask operand into an array of ints, with -1
-  // representing undef values.
-  SmallVector<Constant*, 8> MaskElts;
-  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
-  unsigned MaskNumElts = MaskElts.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (isa<UndefValue>(MaskElts[i]))
-      Mask.push_back(-1);
-    else
-      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
-  }
-
+  SmallVector<int, 8> Mask;
+  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+  unsigned MaskNumElts = Mask.size();
+  
    EVT VT = TLI.getValueType(I.getType());
    EVT SrcVT = Src1.getValueType();
    unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2533,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
      // Mask is longer than the source vectors and is a multiple of the source
      // vectors.  We can use concatenate vector to make the mask and vectors
      // lengths match.
-    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
-      // The shuffle is concatenating two vectors together.
-      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
-                               VT, Src1, Src2));
-      return;
+    if (SrcNumElts*2 == MaskNumElts) {
+      // First check for Src1 in low and Src2 in high
+      if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+        // The shuffle is concatenating two vectors together.
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                                 VT, Src1, Src2));
+        return;
+      }
+      // Then check for Src2 in low and Src1 in high
+      if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+        // The shuffle is concatenating two vectors together.
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                                 VT, Src2, Src1));
+        return;
+      }
      }
  
      // Pad both vectors with undefs to make them the same length as the mask.
@@ -2577,7 +2893,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
      // Analyze the access pattern of the vector to see if we can extract
      // two subvectors and do the shuffle. The analysis is done by calculating
      // the range of elements the mask access on both vectors.
-    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+    int MinRange[2] = { static_cast<int>(SrcNumElts+1),
+                        static_cast<int>(SrcNumElts+1)};
      int MaxRange[2] = {-1, -1};
  
      for (unsigned i = 0; i != MaskNumElts; ++i) {
@@ -2614,7 +2931,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
          } else {
            StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
            if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
-              StartIdx[Input] + MaskNumElts < SrcNumElts)
+              StartIdx[Input] + MaskNumElts <= SrcNumElts)
              RangeUse[Input] = 1; // Extract from a multiple of the mask length.
          }
        }
@@ -2685,13 +3002,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
  void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
    const Value *Op0 = I.getOperand(0);
    const Value *Op1 = I.getOperand(1);
-  const Type *AggTy = I.getType();
-  const Type *ValTy = Op1->getType();
+  Type *AggTy = I.getType();
+  Type *ValTy = Op1->getType();
    bool IntoUndef = isa<UndefValue>(Op0);
    bool FromUndef = isa<UndefValue>(Op1);
  
-  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
-                                            I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
  
    SmallVector<EVT, 4> AggValueVTs;
    ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2703,16 +3019,18 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
    SmallVector<SDValue, 4> Values(NumAggValues);
  
    SDValue Agg = getValue(Op0);
-  SDValue Val = getValue(Op1);
    unsigned i = 0;
    // Copy the beginning value(s) from the original aggregate.
    for (; i != LinearIndex; ++i)
      Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
                  SDValue(Agg.getNode(), Agg.getResNo() + i);
    // Copy values from the inserted value(s).
-  for (; i != LinearIndex + NumValValues; ++i)
-    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
-                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  if (NumValValues) {
+    SDValue Val = getValue(Op1);
+    for (; i != LinearIndex + NumValValues; ++i)
+      Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  }
    // Copy remaining value(s) from the original aggregate.
    for (; i != NumAggValues; ++i)
      Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
@@ -2725,17 +3043,23 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
  
  void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
    const Value *Op0 = I.getOperand(0);
-  const Type *AggTy = Op0->getType();
-  const Type *ValTy = I.getType();
+  Type *AggTy = Op0->getType();
+  Type *ValTy = I.getType();
    bool OutOfUndef = isa<UndefValue>(Op0);
  
-  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
-                                            I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
  
    SmallVector<EVT, 4> ValValueVTs;
    ComputeValueVTs(TLI, ValTy, ValValueVTs);
  
    unsigned NumValValues = ValValueVTs.size();
+
+  // Ignore a extractvalue that produces an empty object
+  if (!NumValValues) {
+    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+    return;
+  }
+
    SmallVector<SDValue, 4> Values(NumValValues);
  
    SDValue Agg = getValue(Op0);
@@ -2753,12 +3077,14 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
  
  void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
    SDValue N = getValue(I.getOperand(0));
-  const Type *Ty = I.getOperand(0)->getType();
+  // Note that the pointer operand may be a vector of pointers. Take the scalar
+  // element which holds a pointer.
+  Type *Ty = I.getOperand(0)->getType()->getScalarType();
  
    for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
         OI != E; ++OI) {
      const Value *Idx = *OI;
-    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+    if (StructType *StTy = dyn_cast<StructType>(Ty)) {
        unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
        if (Field) {
          // N = N + Offset
@@ -2768,11 +3094,6 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
        }
  
        Ty = StTy->getElementType(Field);
-    } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
-      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
-      
-      // Offset canonically 0 for unions, but type changes
-      Ty = UnTy->getElementType(Field);
      } else {
        Ty = cast<SequentialType>(Ty)->getElementType();
  
@@ -2812,7 +3133,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
            unsigned Amt = ElementSize.logBase2();
            IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
                               N.getValueType(), IdxN,
-                             DAG.getConstant(Amt, TLI.getPointerTy()));
+                             DAG.getConstant(Amt, IdxN.getValueType()));
          } else {
            SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
            IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
@@ -2834,7 +3155,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
    if (FuncInfo.StaticAllocaMap.count(&I))
      return;   // getValue will auto-populate this.
  
-  const Type *Ty = I.getAllocatedType();
+  Type *Ty = I.getAllocatedType();
    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
    unsigned Align =
      std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
@@ -2853,7 +3174,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
    // Handle alignment.  If the requested alignment is less than or equal to
    // the stack alignment, ignore it.  If the size is greater than or equal to
    // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
-  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
    if (Align <= StackAlign)
      Align = 0;
  
@@ -2877,18 +3198,23 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
  
    // Inform the Frame Information that we have just allocated a variable-sized
    // object.
-  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
+  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
  }
  
  void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+  if (I.isAtomic())
+    return visitAtomicLoad(I);
+
    const Value *SV = I.getOperand(0);
    SDValue Ptr = getValue(SV);
  
-  const Type *Ty = I.getType();
+  Type *Ty = I.getType();
  
    bool isVolatile = I.isVolatile();
    bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+  bool isInvariant = I.getMetadata("invariant.load") != 0;
    unsigned Alignment = I.getAlignment();
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
  
    SmallVector<EVT, 4> ValueVTs;
    SmallVector<uint64_t, 4> Offsets;
@@ -2899,10 +3225,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
  
    SDValue Root;
    bool ConstantMemory = false;
-  if (I.isVolatile())
+  if (I.isVolatile() || NumValues > MaxParallelChains)
      // Serialize volatile loads with other side effects.
      Root = getRoot();
-  else if (AA->pointsToConstantMemory(SV)) {
+  else if (AA->pointsToConstantMemory(
+             AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
      // Do not serialize (non-volatile) loads of constant memory with anything.
      Root = DAG.getEntryNode();
      ConstantMemory = true;
@@ -2912,23 +3239,38 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
    }
  
    SmallVector<SDValue, 4> Values(NumValues);
-  SmallVector<SDValue, 4> Chains(NumValues);
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
    EVT PtrVT = Ptr.getValueType();
-  for (unsigned i = 0; i != NumValues; ++i) {
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // Serializing loads here may result in excessive register pressure, and
+    // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+    // could recover a bit by hoisting nodes upward in the chain by recognizing
+    // they are side-effect free or do not alias. The optimizer should really
+    // avoid this case by converting large object/array copies to llvm.memcpy
+    // (MaxParallelChains should always remain as failsafe).
+    if (ChainI == MaxParallelChains) {
+      assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
      SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
                              PtrVT, Ptr,
                              DAG.getConstant(Offsets[i], PtrVT));
      SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
-                            A, SV, Offsets[i], isVolatile, 
-                            isNonTemporal, Alignment);
+                            A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+                            isNonTemporal, isInvariant, Alignment, TBAAInfo);
  
      Values[i] = L;
-    Chains[i] = L.getValue(1);
+    Chains[ChainI] = L.getValue(1);
    }
  
    if (!ConstantMemory) {
      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                                MVT::Other, &Chains[0], NumValues);
+                                MVT::Other, &Chains[0], ChainI);
      if (isVolatile)
        DAG.setRoot(Chain);
      else
@@ -2941,6 +3283,9 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
  }
  
  void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+  if (I.isAtomic())
+    return visitAtomicStore(I);
+
    const Value *SrcV = I.getOperand(0);
    const Value *PtrV = I.getOperand(1);
  
@@ -2958,67 +3303,247 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
    SDValue Ptr = getValue(PtrV);
  
    SDValue Root = getRoot();
-  SmallVector<SDValue, 4> Chains(NumValues);
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
    EVT PtrVT = Ptr.getValueType();
    bool isVolatile = I.isVolatile();
    bool isNonTemporal = I.getMetadata("nontemporal") != 0;
    unsigned Alignment = I.getAlignment();
-
-  for (unsigned i = 0; i != NumValues; ++i) {
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // See visitLoad comments.
+    if (ChainI == MaxParallelChains) {
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
                                DAG.getConstant(Offsets[i], PtrVT));
-    Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
-                             SDValue(Src.getNode(), Src.getResNo() + i),
-                             Add, PtrV, Offsets[i], isVolatile, 
-                             isNonTemporal, Alignment);
-  }
+    SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+                              SDValue(Src.getNode(), Src.getResNo() + i),
+                              Add, MachinePointerInfo(PtrV, Offsets[i]),
+                              isVolatile, isNonTemporal, Alignment, TBAAInfo);
+    Chains[ChainI] = St;
+  }
+
+  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+  ++SDNodeOrder;
+  AssignOrderingToNode(StoreNode.getNode());
+  DAG.setRoot(StoreNode);
+}
  
-  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                          MVT::Other, &Chains[0], NumValues));
+static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
+                                    SynchronizationScope Scope,
+                                    bool Before, DebugLoc dl,
+                                    SelectionDAG &DAG,
+                                    const TargetLowering &TLI) {
+  // Fence, if necessary
+  if (Before) {
+    if (Order == AcquireRelease || Order == SequentiallyConsistent)
+      Order = Release;
+    else if (Order == Acquire || Order == Monotonic)
+      return Chain;
+  } else {
+    if (Order == AcquireRelease)
+      Order = Acquire;
+    else if (Order == Release || Order == Monotonic)
+      return Chain;
+  }
+  SDValue Ops[3];
+  Ops[0] = Chain;
+  Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
+  Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
+  return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
  }
  
-/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
-/// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
-                                               unsigned Intrinsic) {
-  bool HasChain = !I.doesNotAccessMemory();
-  bool OnlyLoad = HasChain && I.onlyReadsMemory();
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
  
-  // Build the operand list.
-  SmallVector<SDValue, 8> Ops;
-  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
-    if (OnlyLoad) {
-      // We don't need to serialize loads against other loads.
-      Ops.push_back(DAG.getRoot());
-    } else {
-      Ops.push_back(getRoot());
-    }
-  }
+  SDValue InChain = getRoot();
  
-  // Info is set by getTgtMemInstrinsic
-  TargetLowering::IntrinsicInfo Info;
-  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+                                   DAG, TLI);
  
-  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
-  if (!IsTgtIntrinsic)
-    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+  SDValue L =
+    DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+                  getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
+                  InChain,
+                  getValue(I.getPointerOperand()),
+                  getValue(I.getCompareOperand()),
+                  getValue(I.getNewValOperand()),
+                  MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
  
-  // Add all operands of the call to the operand list.
+  setValue(&I, L);
+  DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  ISD::NodeType NT;
+  switch (I.getOperation()) {
+  default: llvm_unreachable("Unknown atomicrmw operation");
+  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+  case AtomicRMWInst::Add:  NT = ISD::ATOMIC_LOAD_ADD; break;
+  case AtomicRMWInst::Sub:  NT = ISD::ATOMIC_LOAD_SUB; break;
+  case AtomicRMWInst::And:  NT = ISD::ATOMIC_LOAD_AND; break;
+  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+  case AtomicRMWInst::Or:   NT = ISD::ATOMIC_LOAD_OR; break;
+  case AtomicRMWInst::Xor:  NT = ISD::ATOMIC_LOAD_XOR; break;
+  case AtomicRMWInst::Max:  NT = ISD::ATOMIC_LOAD_MAX; break;
+  case AtomicRMWInst::Min:  NT = ISD::ATOMIC_LOAD_MIN; break;
+  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+  }
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+                                   DAG, TLI);
+
+  SDValue L =
+    DAG.getAtomic(NT, dl,
+                  getValue(I.getValOperand()).getValueType().getSimpleVT(),
+                  InChain,
+                  getValue(I.getPointerOperand()),
+                  getValue(I.getValOperand()),
+                  I.getPointerOperand(), 0 /* Alignment */,
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  setValue(&I, L);
+  DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  SDValue Ops[3];
+  Ops[0] = getRoot();
+  Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
+  Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  EVT VT = EVT::getEVT(I.getType());
+
+  if (I.getAlignment() * 8 < VT.getSizeInBits())
+    report_fatal_error("Cannot generate unaligned atomic load");
+
+  SDValue L =
+    DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+                  getValue(I.getPointerOperand()),
+                  I.getPointerOperand(), I.getAlignment(),
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  setValue(&I, L);
+  DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+
+  if (I.getAlignment() * 8 < VT.getSizeInBits())
+    report_fatal_error("Cannot generate unaligned atomic store");
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+                                   DAG, TLI);
+
+  SDValue OutChain =
+    DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
+                  InChain,
+                  getValue(I.getPointerOperand()),
+                  getValue(I.getValueOperand()),
+                  I.getPointerOperand(), I.getAlignment(),
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  DAG.setRoot(OutChain);
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+                                               unsigned Intrinsic) {
+  bool HasChain = !I.doesNotAccessMemory();
+  bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+  // Build the operand list.
+  SmallVector<SDValue, 8> Ops;
+  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
+    if (OnlyLoad) {
+      // We don't need to serialize loads against other loads.
+      Ops.push_back(DAG.getRoot());
+    } else {
+      Ops.push_back(getRoot());
+    }
+  }
+
+  // Info is set by getTgtMemInstrinsic
+  TargetLowering::IntrinsicInfo Info;
+  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+      Info.opc == ISD::INTRINSIC_W_CHAIN)
+    Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
+
+  // Add all operands of the call to the operand list.
    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
      SDValue Op = getValue(I.getArgOperand(i));
-    assert(TLI.isTypeLegal(Op.getValueType()) &&
-           "Intrinsic uses a non-legal type?");
      Ops.push_back(Op);
    }
  
    SmallVector<EVT, 4> ValueVTs;
    ComputeValueVTs(TLI, I.getType(), ValueVTs);
-#ifndef NDEBUG
-  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
-    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
-           "Intrinsic uses a non-legal type?");
-  }
-#endif // NDEBUG
  
    if (HasChain)
      ValueVTs.push_back(MVT::Other);
@@ -3031,7 +3556,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
      // This is target intrinsic that touches memory
      Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
                                       VTs, &Ops[0], Ops.size(),
-                                     Info.memVT, Info.ptrVal, Info.offset,
+                                     Info.memVT,
+                                   MachinePointerInfo(Info.ptrVal, Info.offset),
                                       Info.align, Info.vol,
                                       Info.readMem, Info.writeMem);
    } else if (!HasChain) {
@@ -3054,9 +3580,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
    }
  
    if (!I.getType()->isVoidTy()) {
-    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+    if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
        EVT VT = TLI.getValueType(PTy);
-      Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+      Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
      }
  
      setValue(&I, Result);
@@ -3075,7 +3601,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
                             DAG.getConstant(0x007fffff, MVT::i32));
    SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
                             DAG.getConstant(0x3f800000, MVT::i32));
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
  }
  
  /// GetExponent - Get the exponent:
@@ -3101,25 +3627,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
    return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
  }
  
-/// Inlined utility function to implement binary input atomic intrinsics for
-/// visitIntrinsicCall: I is a call instruction
-///                     Op is the associated NodeType for I
-const char *
-SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
-                                           ISD::NodeType Op) {
-  SDValue Root = getRoot();
-  SDValue L =
-    DAG.getAtomic(Op, getCurDebugLoc(),
-                  getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
-                  Root,
-                  getValue(I.getArgOperand(0)),
-                  getValue(I.getArgOperand(1)),
-                  I.getArgOperand(0));
-  setValue(&I, L);
-  DAG.setRoot(L.getValue(1));
-  return 0;
-}
-
  // implVisitAluOverflow - Lower arithmetic overflow instrinsics.
  const char *
  SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
@@ -3174,13 +3681,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
        SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
        SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                 getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
  
        // Add the exponent into the result in integer domain.
        SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 TwoToFracPartOfX, IntegerPartOfX);
  
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
      } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
        // For floating-point precision of 12:
        //
@@ -3200,13 +3707,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
        SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
        SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                 getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
  
        // Add the exponent into the result in integer domain.
        SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 TwoToFracPartOfX, IntegerPartOfX);
  
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
      } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
        // For floating-point precision of 18:
        //
@@ -3238,14 +3745,14 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
        SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
        SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
                                  getF32Constant(DAG, 0x3f800000));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
                                               MVT::i32, t13);
  
        // Add the exponent into the result in integer domain.
        SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                  TwoToFracPartOfX, IntegerPartOfX);
  
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
      }
    } else {
      // No special expansion.
@@ -3267,7 +3774,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
    if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
      SDValue Op = getValue(I.getArgOperand(0));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
  
      // Scale the exponent by log(2) [0.69314718f].
      SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3377,7 +3884,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
    if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
      SDValue Op = getValue(I.getArgOperand(0));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
  
      // Get the exponent.
      SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
@@ -3486,7 +3993,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
    if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
      SDValue Op = getValue(I.getArgOperand(0));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
  
      // Scale the exponent by log10(2) [0.30102999f].
      SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3614,11 +4121,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
        SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
        SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                 getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
        SDValue TwoToFractionalPartOfX =
          DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
  
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                             MVT::f32, TwoToFractionalPartOfX);
      } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
        // For floating-point precision of 12:
@@ -3639,11 +4146,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
        SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
        SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                 getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
        SDValue TwoToFractionalPartOfX =
          DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
  
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                             MVT::f32, TwoToFractionalPartOfX);
      } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
        // For floating-point precision of 18:
@@ -3675,11 +4182,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
        SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
        SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
                                  getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
        SDValue TwoToFractionalPartOfX =
          DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
  
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                             MVT::f32, TwoToFractionalPartOfX);
      }
    } else {
@@ -3747,11 +4254,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
        SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
        SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                 getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
        SDValue TwoToFractionalPartOfX =
          DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
  
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                             MVT::f32, TwoToFractionalPartOfX);
      } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
        // For floating-point precision of 12:
@@ -3772,11 +4279,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
        SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
        SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                 getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
        SDValue TwoToFractionalPartOfX =
          DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
  
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                             MVT::f32, TwoToFractionalPartOfX);
      } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
        // For floating-point precision of 18:
@@ -3808,11 +4315,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
        SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
        SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
                                  getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
        SDValue TwoToFractionalPartOfX =
          DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
  
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                             MVT::f32, TwoToFractionalPartOfX);
      }
    } else {
@@ -3879,30 +4386,55 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
    return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
  }
  
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+  if (N.getOpcode() != ISD::TRUNCATE)
+    return 0;
+
+  const SDValue &Ext = N.getOperand(0);
+  if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+    const SDValue &CFR = Ext.getOperand(0);
+    if (CFR.getOpcode() == ISD::CopyFromReg)
+      return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+    else
+      if (CFR.getOpcode() == ISD::TRUNCATE)
+        return getTruncatedArgReg(CFR);
+  }
+  return 0;
+}
+
  /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
  /// argument, create the corresponding DBG_VALUE machine instruction for it now.
  /// At the end of instruction selection, they will be inserted to the entry BB.
  bool
-SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
-                                              const Value *V, MDNode *Variable,
-                                              uint64_t Offset,
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+                                              int64_t Offset,
                                                const SDValue &N) {
-  if (!isa<Argument>(V))
+  const Argument *Arg = dyn_cast<Argument>(V);
+  if (!Arg)
      return false;
  
    MachineFunction &MF = DAG.getMachineFunction();
+  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
    // Ignore inlined function arguments here.
    DIVariable DV(Variable);
    if (DV.isInlinedFnArgument(MF.getFunction()))
      return false;
  
-  MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()];
-  if (MBB != &MF.front())
-    return false;
-
    unsigned Reg = 0;
-  if (N.getOpcode() == ISD::CopyFromReg) {
-    Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+  // Some arguments' frame index is recorded during argument lowering.
+  Offset = FuncInfo.getArgumentFrameIndex(Arg);
+  if (Offset)
+      Reg = TRI->getFrameRegister(MF);
+
+  if (!Reg && N.getNode()) {
+    if (N.getOpcode() == ISD::CopyFromReg)
+      Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+    else
+      Reg = getTruncatedArgReg(N);
      if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
        MachineRegisterInfo &RegInfo = MF.getRegInfo();
        unsigned PR = RegInfo.getLiveInPhysReg(Reg);
@@ -3912,13 +4444,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
    }
  
    if (!Reg) {
+    // Check if ValueMap has reg number.
      DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
-    if (VMI == FuncInfo.ValueMap.end())
-      return false;
-    Reg = VMI->second;
+    if (VMI != FuncInfo.ValueMap.end())
+      Reg = VMI->second;
    }
  
-  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  if (!Reg && N.getNode()) {
+    // Check if frame index is available.
+    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+      if (FrameIndexSDNode *FINode =
+          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+        Reg = TRI->getFrameRegister(MF);
+        Offset = FINode->getIndex();
+      }
+  }
+
+  if (!Reg)
+    return false;
+
    MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
                                      TII->get(TargetOpcode::DBG_VALUE))
      .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
@@ -3927,9 +4471,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
  }
  
  // VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
  #endif
  
  /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
@@ -3957,9 +4503,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               getValue(I.getArgOperand(0))));
      return 0;
    case Intrinsic::setjmp:
-    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+    return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
    case Intrinsic::longjmp:
-    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+    return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
    case Intrinsic::memcpy: {
      // Assert for address < 256 since we support only user defined address
      // spaces.
@@ -3974,7 +4520,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
      bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
-                              I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+                              MachinePointerInfo(I.getArgOperand(0)),
+                              MachinePointerInfo(I.getArgOperand(1))));
      return 0;
    }
    case Intrinsic::memset: {
@@ -3989,7 +4536,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
      bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
      DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
-                              I.getArgOperand(0), 0));
+                              MachinePointerInfo(I.getArgOperand(0))));
      return 0;
    }
    case Intrinsic::memmove: {
@@ -4005,51 +4552,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      SDValue Op3 = getValue(I.getArgOperand(2));
      unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
      bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-
-    // If the source and destination are known to not be aliases, we can
-    // lower memmove as memcpy.
-    uint64_t Size = -1ULL;
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
-      Size = C->getZExtValue();
-    if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
-        AliasAnalysis::NoAlias) {
-      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, 
-                                false, I.getArgOperand(0), 0, I.getArgOperand(1), 0));
-      return 0;
-    }
-
      DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
-                               I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+                               MachinePointerInfo(I.getArgOperand(0)),
+                               MachinePointerInfo(I.getArgOperand(1))));
      return 0;
    }
    case Intrinsic::dbg_declare: {
      const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
-    if (!DIVariable(DI.getVariable()).Verify())
-      return 0;
-
      MDNode *Variable = DI.getVariable();
-    // Parameters are handled specially.
-    bool isParameter = 
-      DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
      const Value *Address = DI.getAddress();
-    if (!Address)
+    if (!Address || !DIVariable(Variable).Verify()) {
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
        return 0;
-    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
-      Address = BCI->getOperand(0);
-    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-    if (AI) {
-      // Don't handle byval arguments or VLAs, for example.
-      // Non-byval arguments are handled here (they refer to the stack temporary
-      // alloca at this point).
-      DenseMap<const AllocaInst*, int>::iterator SI =
-        FuncInfo.StaticAllocaMap.find(AI);
-      if (SI == FuncInfo.StaticAllocaMap.end())
-        return 0; // VLAs.
-      int FI = SI->second;
-
-      MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-      if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
-        MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
      }
  
      // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
@@ -4057,31 +4571,72 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      // absolute, but not relative, values are different depending on whether
      // debug info exists.
      ++SDNodeOrder;
+
+    // Check if address has undef value.
+    if (isa<UndefValue>(Address) ||
+        (Address->use_empty() && !isa<Argument>(Address))) {
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+      return 0;
+    }
+
      SDValue &N = NodeMap[Address];
+    if (!N.getNode() && isa<Argument>(Address))
+      // Check unused arguments map.
+      N = UnusedArgNodeMap[Address];
      SDDbgValue *SDV;
      if (N.getNode()) {
+      if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+        Address = BCI->getOperand(0);
+      // Parameters are handled specially.
+      bool isParameter =
+        (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+         isa<Argument>(Address));
+
+      const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+
        if (isParameter && !AI) {
          FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
          if (FINode)
            // Byval parameter.  We have a frame index at this point.
            SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
                                  0, dl, SDNodeOrder);
-        else
-          // Can't do anything with other non-AI cases yet.  This might be a
-          // parameter of a callee function that got inlined, for example.
+        else {
+          // Address is an argument, so try to emit its dbg value using
+          // virtual register info from the FuncInfo.ValueMap.
+          EmitFuncArgumentDbgValue(Address, Variable, 0, N);
            return 0;
+        }
        } else if (AI)
          SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
                                0, dl, SDNodeOrder);
-      else
+      else {
          // Can't do anything with other non-AI cases yet.
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+        DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+        DEBUG(Address->dump());
          return 0;
+      }
        DAG.AddDbgValue(SDV, N.getNode(), isParameter);
      } else {
-      // This isn't useful, but it shows what we're missing.
-      SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
-                            0, dl, SDNodeOrder);
-      DAG.AddDbgValue(SDV, 0, isParameter);
+      // If Address is an argument then try to emit its dbg value using
+      // virtual register info from the FuncInfo.ValueMap.
+      if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+        // If variable is pinned by a alloca in dominating bb then
+        // use StaticAllocaMap.
+        if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+          if (AI->getParent() != DI.getParent()) {
+            DenseMap<const AllocaInst*, int>::iterator SI =
+              FuncInfo.StaticAllocaMap.find(AI);
+            if (SI != FuncInfo.StaticAllocaMap.end()) {
+              SDV = DAG.getDbgValue(Variable, SI->second,
+                                    0, dl, SDNodeOrder);
+              DAG.AddDbgValue(SDV, 0, false);
+              return 0;
+            }
+          }
+        }
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+      }
      }
      return 0;
    }
@@ -4102,40 +4657,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      // debug info exists.
      ++SDNodeOrder;
      SDDbgValue *SDV;
-    if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
        SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
        DAG.AddDbgValue(SDV, 0, false);
      } else {
-      bool createUndef = false;
-      // FIXME : Why not use getValue() directly ?
+      // Do not use getValue() in here; we don't want to generate code at
+      // this point if it hasn't been done yet.
        SDValue N = NodeMap[V];
        if (!N.getNode() && isa<Argument>(V))
          // Check unused arguments map.
          N = UnusedArgNodeMap[V];
        if (N.getNode()) {
-        if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
+        if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
            SDV = DAG.getDbgValue(Variable, N.getNode(),
                                  N.getResNo(), Offset, dl, SDNodeOrder);
            DAG.AddDbgValue(SDV, N.getNode(), false);
          }
-      } else if (isa<PHINode>(V) && !V->use_empty()) {
-        SDValue N = getValue(V);
-        if (N.getNode()) {
-          if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
-            SDV = DAG.getDbgValue(Variable, N.getNode(),
-                                  N.getResNo(), Offset, dl, SDNodeOrder);
-            DAG.AddDbgValue(SDV, N.getNode(), false);
-          }
-        } else
-          createUndef = true;
-      } else
-        createUndef = true;
-      if (createUndef) {
+      } else if (!V->use_empty() ) {
+        // Do not call getValue(V) yet, as we don't want to generate code.
+        // Remember it for later.
+        DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+        DanglingDebugInfoMap[V] = DDI;
+      } else {
          // We may expand this to cover more cases.  One case where we have no
-        // data available is an unreferenced parameter; we need this fallback.
-        SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
-                              Offset, dl, SDNodeOrder);
-        DAG.AddDbgValue(SDV, 0, false);
+        // data available is an unreferenced parameter.
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
        }
      }
  
@@ -4151,49 +4697,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      if (SI == FuncInfo.StaticAllocaMap.end())
        return 0; // VLAs.
      int FI = SI->second;
-    
+
      MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
      if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
        MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
      return 0;
    }
-  case Intrinsic::eh_exception: {
-    // Insert the EXCEPTIONADDR instruction.
-    assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() &&
-           "Call to eh.exception not in landing pad!");
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[1];
-    Ops[0] = DAG.getRoot();
-    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
-    setValue(&I, Op);
-    DAG.setRoot(Op.getValue(1));
-    return 0;
-  }
-
-  case Intrinsic::eh_selector: {
-    MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()];
-    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-    if (CallMBB->isLandingPad())
-      AddCatchInfo(I, &MMI, CallMBB);
-    else {
-#ifndef NDEBUG
-      FuncInfo.CatchInfoLost.insert(&I);
-#endif
-      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-      unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg);
-    }
-
-    // Insert the EHSELECTION instruction.
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[2];
-    Ops[0] = getValue(I.getArgOperand(0));
-    Ops[1] = getRoot();
-    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
-    DAG.setRoot(Op.getValue(1));
-    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
-    return 0;
-  }
  
    case Intrinsic::eh_typeid_for: {
      // Find the type id for the given typeinfo.
@@ -4240,18 +4749,107 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      MMI.setCurrentCallSite(CI->getZExtValue());
      return 0;
    }
+  case Intrinsic::eh_sjlj_functioncontext: {
+    // Get and store the index of the function context.
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    AllocaInst *FnCtx =
+      cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+    int FI = FuncInfo.StaticAllocaMap[FnCtx];
+    MFI->setFunctionContextIndex(FI);
+    return 0;
+  }
    case Intrinsic::eh_sjlj_setjmp: {
-    setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
-                             getValue(I.getArgOperand(0))));
+    SDValue Ops[2];
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getArgOperand(0));
+    SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl,
+                             DAG.getVTList(MVT::i32, MVT::Other),
+                             Ops, 2);
+    setValue(&I, Op.getValue(0));
+    DAG.setRoot(Op.getValue(1));
      return 0;
    }
    case Intrinsic::eh_sjlj_longjmp: {
      DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
-                            getRoot(),
-                            getValue(I.getArgOperand(0))));
+                            getRoot(), getValue(I.getArgOperand(0))));
      return 0;
    }
  
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d: {
+    SDValue ShAmt = getValue(I.getArgOperand(1));
+    if (isa<ConstantSDNode>(ShAmt)) {
+      visitTargetIntrinsic(I, Intrinsic);
+      return 0;
+    }
+    unsigned NewIntrinsic = 0;
+    EVT ShAmtVT = MVT::v2i32;
+    switch (Intrinsic) {
+    case Intrinsic::x86_mmx_pslli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+      break;
+    case Intrinsic::x86_mmx_pslli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+      break;
+    case Intrinsic::x86_mmx_pslli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+      break;
+    case Intrinsic::x86_mmx_psrli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+      break;
+    case Intrinsic::x86_mmx_psrli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+      break;
+    case Intrinsic::x86_mmx_psrli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+      break;
+    case Intrinsic::x86_mmx_psrai_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+      break;
+    case Intrinsic::x86_mmx_psrai_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+      break;
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    }
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+    // to be zero.
+    // We must do this early because v2i32 is not a legal type.
+    DebugLoc dl = getCurDebugLoc();
+    SDValue ShOps[2];
+    ShOps[0] = ShAmt;
+    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+    EVT DestVT = TLI.getValueType(I.getType());
+    ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                       DAG.getConstant(NewIntrinsic, MVT::i32),
+                       getValue(I.getArgOperand(0)), ShAmt);
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::x86_avx_vinsertf128_pd_256:
+  case Intrinsic::x86_avx_vinsertf128_ps_256:
+  case Intrinsic::x86_avx_vinsertf128_si_256: {
+    DebugLoc dl = getCurDebugLoc();
+    EVT DestVT = TLI.getValueType(I.getType());
+    EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+                   ElVT.getVectorNumElements();
+    Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+                      getValue(I.getArgOperand(0)),
+                      getValue(I.getArgOperand(1)),
+                      DAG.getConstant(Idx, MVT::i32));
+    setValue(&I, Res);
+    return 0;
+  }
    case Intrinsic::convertff:
    case Intrinsic::convertfsi:
    case Intrinsic::convertfui:
@@ -4321,6 +4919,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::pow:
      visitPow(I);
      return 0;
+  case Intrinsic::fma:
+    setValue(&I, DAG.getNode(ISD::FMA, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)),
+                             getValue(I.getArgOperand(2))));
+    return 0;
    case Intrinsic::convert_to_fp16:
      setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
                               MVT::i16, getValue(I.getArgOperand(0))));
@@ -4350,14 +4955,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      return 0;
    case Intrinsic::cttz: {
      SDValue Arg = getValue(I.getArgOperand(0));
+    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
      EVT Ty = Arg.getValueType();
-    setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+                             dl, Ty, Arg));
      return 0;
    }
    case Intrinsic::ctlz: {
      SDValue Arg = getValue(I.getArgOperand(0));
+    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
      EVT Ty = Arg.getValueType();
-    setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+                             dl, Ty, Arg));
      return 0;
    }
    case Intrinsic::ctpop: {
@@ -4395,8 +5004,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
  
      // Store the stack protector onto the stack.
      Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
-                       PseudoSourceValue::getFixedStack(FI),
-                       0, true, false, 0);
+                       MachinePointerInfo::getFixedStack(FI),
+                       true, false, 0);
      setValue(&I, Res);
      DAG.setRoot(Res);
      return 0;
@@ -4433,12 +5042,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
      Ops[5] = DAG.getSrcValue(F);
  
-    Res = DAG.getNode(ISD::TRAMPOLINE, dl,
-                      DAG.getVTList(TLI.getPointerTy(), MVT::Other),
-                      Ops, 6);
+    Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
  
-    setValue(&I, Res);
-    DAG.setRoot(Res.getValue(1));
+    DAG.setRoot(Res);
+    return 0;
+  }
+  case Intrinsic::adjust_trampoline: {
+    setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
+                             TLI.getPointerTy(),
+                             getValue(I.getArgOperand(0))));
      return 0;
    }
    case Intrinsic::gcroot:
@@ -4453,13 +5065,33 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::gcread:
    case Intrinsic::gcwrite:
      llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
-    return 0;
    case Intrinsic::flt_rounds:
      setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
      return 0;
-  case Intrinsic::trap:
-    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+
+  case Intrinsic::expect: {
+    // Just replace __builtin_expect(exp, c) with EXP.
+    setValue(&I, getValue(I.getArgOperand(0)));
+    return 0;
+  }
+
+  case Intrinsic::trap: {
+    StringRef TrapFuncName = TM.Options.getTrapFunctionName();
+    if (TrapFuncName.empty()) {
+      DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+      return 0;
+    }
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> Result =
+      TLI.LowerCallTo(getRoot(), I.getType(),
+                 false, false, false, false, 0, CallingConv::C,
+                 /*isTailCall=*/false,
+                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+                 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+                 Args, DAG, getCurDebugLoc());
+    DAG.setRoot(Result.second);
      return 0;
+  }
    case Intrinsic::uadd_with_overflow:
      return implVisitAluOverflow(I, ISD::UADDO);
    case Intrinsic::sadd_with_overflow:
@@ -4474,60 +5106,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      return implVisitAluOverflow(I, ISD::SMULO);
  
    case Intrinsic::prefetch: {
-    SDValue Ops[4];
+    SDValue Ops[5];
+    unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
      Ops[0] = getRoot();
      Ops[1] = getValue(I.getArgOperand(0));
      Ops[2] = getValue(I.getArgOperand(1));
      Ops[3] = getValue(I.getArgOperand(2));
-    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
-    return 0;
-  }
-
-  case Intrinsic::memory_barrier: {
-    SDValue Ops[6];
-    Ops[0] = getRoot();
-    for (int x = 1; x < 6; ++x)
-      Ops[x] = getValue(I.getArgOperand(x - 1));
-
-    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
-    return 0;
-  }
-  case Intrinsic::atomic_cmp_swap: {
-    SDValue Root = getRoot();
-    SDValue L =
-      DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
-                    getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
-                    Root,
-                    getValue(I.getArgOperand(0)),
-                    getValue(I.getArgOperand(1)),
-                    getValue(I.getArgOperand(2)),
-                    I.getArgOperand(0));
-    setValue(&I, L);
-    DAG.setRoot(L.getValue(1));
+    Ops[4] = getValue(I.getArgOperand(3));
+    DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+                                        DAG.getVTList(MVT::Other),
+                                        &Ops[0], 5,
+                                        EVT::getIntegerVT(*Context, 8),
+                                        MachinePointerInfo(I.getArgOperand(0)),
+                                        0, /* align */
+                                        false, /* volatile */
+                                        rw==0, /* read */
+                                        rw==1)); /* write */
      return 0;
    }
-  case Intrinsic::atomic_load_add:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
-  case Intrinsic::atomic_load_sub:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
-  case Intrinsic::atomic_load_or:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
-  case Intrinsic::atomic_load_xor:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
-  case Intrinsic::atomic_load_and:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
-  case Intrinsic::atomic_load_nand:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
-  case Intrinsic::atomic_load_max:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
-  case Intrinsic::atomic_load_min:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
-  case Intrinsic::atomic_load_umin:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
-  case Intrinsic::atomic_load_umax:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
-  case Intrinsic::atomic_swap:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
  
    case Intrinsic::invariant_start:
    case Intrinsic::lifetime_start:
@@ -4544,9 +5140,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
  void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                        bool isTailCall,
                                        MachineBasicBlock *LandingPad) {
-  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
-  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
-  const Type *RetTy = FTy->getReturnType();
+  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  Type *RetTy = FTy->getReturnType();
    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
    MCSymbol *BeginLabel = 0;
  
@@ -4555,16 +5151,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
    Args.reserve(CS.arg_size());
  
    // Check whether the function can return without sret-demotion.
-  SmallVector<EVT, 4> OutVTs;
-  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+  SmallVector<ISD::OutputArg, 4> Outs;
    SmallVector<uint64_t, 4> Offsets;
-  getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
-                OutVTs, OutsFlags, TLI, &Offsets);
+  GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+                Outs, TLI, &Offsets);
  
    bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
-                        FTy->isVarArg(), OutVTs, OutsFlags, DAG);
+                                          DAG.getMachineFunction(),
+                                          FTy->isVarArg(), Outs,
+                                          FTy->getContext());
  
    SDValue DemoteStackSlot;
+  int DemoteStackIdx = -100;
  
    if (!CanLowerReturn) {
      uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
@@ -4572,10 +5170,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
                        FTy->getReturnType());
      MachineFunction &MF = DAG.getMachineFunction();
-    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
  
-    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
      Entry.Node = DemoteStackSlot;
      Entry.Ty = StackSlotPtrType;
      Entry.isSExt = false;
@@ -4591,8 +5189,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
  
    for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
         i != e; ++i) {
-    SDValue ArgNode = getValue(*i);
-    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+    const Value *V = *i;
+
+    // Skip empty types
+    if (V->getType()->isEmptyTy())
+      continue;
+
+    SDValue ArgNode = getValue(V);
+    Entry.Node = ArgNode; Entry.Ty = V->getType();
  
      unsigned attrInd = i - CS.arg_begin() + 1;
      Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
@@ -4615,6 +5219,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      unsigned CallSiteIndex = MMI.getCurrentCallSite();
      if (CallSiteIndex) {
        MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+      LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+
        // Now that the call site is handled, stop tracking it.
        MMI.setCurrentCallSite(0);
      }
@@ -4631,6 +5237,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
        !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
      isTailCall = false;
  
+  // If there's a possibility that fast-isel has already selected some amount
+  // of the current basic block, don't emit a tail call.
+  if (isTailCall && TM.Options.EnableFastISel)
+    isTailCall = false;
+
    std::pair<SDValue,SDValue> Result =
      TLI.LowerCallTo(getRoot(), RetTy,
                      CS.paramHasAttr(0, Attribute::SExt),
@@ -4638,6 +5249,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                      CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
                      CS.getCallingConv(),
                      isTailCall,
+                    CS.doesNotReturn(),
                      !CS.getInstruction()->use_empty(),
                      Callee, Args, DAG, getCurDebugLoc());
    assert((isTailCall || Result.second.getNode()) &&
@@ -4650,12 +5262,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      // The instruction result is the result of loading from the
      // hidden sret parameter.
      SmallVector<EVT, 1> PVTs;
-    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+    Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
  
      ComputeValueVTs(TLI, PtrRetTy, PVTs);
      assert(PVTs.size() == 1 && "Pointers should fit in one register");
      EVT PtrVT = PVTs[0];
-    unsigned NumValues = OutVTs.size();
+    unsigned NumValues = Outs.size();
      SmallVector<SDValue, 4> Values(NumValues);
      SmallVector<SDValue, 4> Chains(NumValues);
  
@@ -4663,8 +5275,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
        SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
                                  DemoteStackSlot,
                                  DAG.getConstant(Offsets[i], PtrVT));
-      SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
-                              Add, NULL, Offsets[i], false, false, 1);
+      SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
+                              Add,
+                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+                              false, false, false, 1);
        Values[i] = L;
        Chains[i] = L.getValue(1);
      }
@@ -4672,7 +5286,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
                                  MVT::Other, &Chains[0], NumValues);
      PendingLoads.push_back(Chain);
-    
+
      // Collect the legal value parts into potentially illegal values
      // that correspond to the original function's return values.
      SmallVector<EVT, 4> RetTys;
@@ -4685,7 +5299,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
        EVT VT = RetTys[I];
        EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
        unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-  
+
        SDValue ReturnValue =
          getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
                           RegisterVT, VT, AssertOp);
@@ -4697,15 +5311,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
               DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
                           DAG.getVTList(&RetTys[0], RetTys.size()),
                           &ReturnValues[0], ReturnValues.size()));
-
    }
  
-  // As a special case, a null chain means that a tail call has been emitted and
-  // the DAG root is already updated.
-  if (Result.second.getNode())
-    DAG.setRoot(Result.second);
-  else
+  // Assign order to nodes here. If the call does not produce a result, it won't
+  // be mapped to a SDNode and visit() will not assign it an order number.
+  if (!Result.second.getNode()) {
+    // As a special case, a null chain means that a tail call has been emitted and
+    // the DAG root is already updated.
      HasTailCall = true;
+    ++SDNodeOrder;
+    AssignOrderingToNode(DAG.getRoot().getNode());
+  } else {
+    DAG.setRoot(Result.second);
+    ++SDNodeOrder;
+    AssignOrderingToNode(Result.second.getNode());
+  }
  
    if (LandingPad) {
      // Insert a label at the end of the invoke call to mark the try range.  This
@@ -4735,7 +5355,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
  }
  
  static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
-                             const Type *LoadTy,
+                             Type *LoadTy,
                               SelectionDAGBuilder &Builder) {
  
    // Check to see if this load can be trivially constant folded, e.g. if the
@@ -4767,9 +5387,10 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
  
    SDValue Ptr = Builder.getValue(PtrVal);
    SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
-                                        Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+                                        Ptr, MachinePointerInfo(PtrVal),
                                          false /*volatile*/,
-                                        false /*nontemporal*/, 1 /* align=1 */);
+                                        false /*nontemporal*/, 
+                                        false /*isinvariant*/, 1 /* align=1 */);
  
    if (!ConstantMemory)
      Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -4798,7 +5419,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
    if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
      bool ActuallyDoIt = true;
      MVT LoadVT;
-    const Type *LoadTy;
+    Type *LoadTy;
      switch (Size->getZExtValue()) {
      default:
        LoadVT = MVT::Other;
@@ -4863,7 +5484,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
      visitInlineAsm(&I);
      return;
    }
-  
+
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  ComputeUsesVAFloatArgument(I, &MMI);
+
    const char *RenameFn = 0;
    if (Function *F = I.getCalledFunction()) {
      if (F->isDeclaration()) {
@@ -4885,7 +5509,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
      // can't be a library call.
      if (!F->hasLocalLinkage() && F->hasName()) {
        StringRef Name = F->getName();
-      if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+      if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
+          (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
+          (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
          if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
              I.getArgOperand(0)->getType()->isFloatingPointTy() &&
              I.getType() == I.getArgOperand(0)->getType() &&
@@ -4896,7 +5522,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                     LHS.getValueType(), LHS, RHS));
            return;
          }
-      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+      } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
+                 (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
+                 (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
          if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
              I.getArgOperand(0)->getType()->isFloatingPointTy() &&
              I.getType() == I.getArgOperand(0)->getType()) {
@@ -4905,7 +5533,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                     Tmp.getValueType(), Tmp));
            return;
          }
-      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+      } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
+                 (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
+                 (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
          if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
              I.getArgOperand(0)->getType()->isFloatingPointTy() &&
              I.getType() == I.getArgOperand(0)->getType() &&
@@ -4915,7 +5545,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                     Tmp.getValueType(), Tmp));
            return;
          }
-      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+      } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
+                 (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
+                 (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
          if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
              I.getArgOperand(0)->getType()->isFloatingPointTy() &&
              I.getType() == I.getArgOperand(0)->getType() &&
@@ -4925,7 +5557,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                     Tmp.getValueType(), Tmp));
            return;
          }
-      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+      } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
+                 (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
+                 (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
          if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
              I.getArgOperand(0)->getType()->isFloatingPointTy() &&
              I.getType() == I.getArgOperand(0)->getType() &&
@@ -4935,13 +5569,92 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                     Tmp.getValueType(), Tmp));
            return;
          }
+      } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
+                 (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
+                 (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
+                 (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
+                 (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
+                 (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
+                 (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
+                 (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
+                 (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
+                 (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
+                 (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
+                 (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
+                 (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
+                 (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
+                 (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
        } else if (Name == "memcmp") {
          if (visitMemCmpCall(I))
            return;
        }
      }
    }
-  
+
    SDValue Callee;
    if (!RenameFn)
      Callee = getValue(I.getCalledValue());
@@ -4953,12 +5666,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
    LowerCallTo(&I, Callee, I.isTailCall());
  }
  
-namespace llvm {
+namespace {
  
  /// AsmOperandInfo - This contains information for each constraint that we are
  /// lowering.
-class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
-    public TargetLowering::AsmOperandInfo {
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
  public:
    /// CallOperand - If this is the result output operand or a clobber
    /// this is null, otherwise it is the incoming operand to the CallInst.
@@ -4969,26 +5681,10 @@ public:
    /// contains the set of register corresponding to the operand.
    RegsForValue AssignedRegs;
  
-  explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+  explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
      : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
    }
  
-  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
-  /// busy in OutputRegs/InputRegs.
-  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
-                         std::set<unsigned> &OutputRegs,
-                         std::set<unsigned> &InputRegs,
-                         const TargetRegisterInfo &TRI) const {
-    if (isOutReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
-    }
-    if (isInReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
-    }
-  }
-
    /// getCallOperandValEVT - Return the EVT of the Value* that this operand
    /// corresponds to.  If there is no Value* for this operand, it returns
    /// MVT::Other.
@@ -5000,17 +5696,23 @@ public:
      if (isa<BasicBlock>(CallOperandVal))
        return TLI.getPointerTy();
  
-    const llvm::Type *OpTy = CallOperandVal->getType();
+    llvm::Type *OpTy = CallOperandVal->getType();
  
+    // FIXME: code duplicated from TargetLowering::ParseConstraints().
      // If this is an indirect operand, the operand is a pointer to the
      // accessed type.
      if (isIndirect) {
-      const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+      llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
        if (!PtrTy)
          report_fatal_error("Indirect operand for inline asm not a pointer!");
        OpTy = PtrTy->getElementType();
      }
  
+    // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+    if (StructType *STy = dyn_cast<StructType>(OpTy))
+      if (STy->getNumElements() == 1)
+        OpTy = STy->getElementType(0);
+
      // If OpTy is not a single value, it may be a struct/union that we
      // can tile with integers.
      if (!OpTy->isSingleValueType() && OpTy->isSized()) {
@@ -5030,69 +5732,11 @@ public:
  
      return TLI.getValueType(OpTy, true);
    }
-
-private:
-  /// MarkRegAndAliases - Mark the specified register and all aliases in the
-  /// specified set.
-  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
-                                const TargetRegisterInfo &TRI) {
-    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
-    Regs.insert(Reg);
-    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
-      for (; *Aliases; ++Aliases)
-        Regs.insert(*Aliases);
-  }
  };
  
-} // end llvm namespace.
-
-/// isAllocatableRegister - If the specified register is safe to allocate,
-/// i.e. it isn't a stack pointer or some other special register, return the
-/// register class for the register.  Otherwise, return null.
-static const TargetRegisterClass *
-isAllocatableRegister(unsigned Reg, MachineFunction &MF,
-                      const TargetLowering &TLI,
-                      const TargetRegisterInfo *TRI) {
-  EVT FoundVT = MVT::Other;
-  const TargetRegisterClass *FoundRC = 0;
-  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
-       E = TRI->regclass_end(); RCI != E; ++RCI) {
-    EVT ThisVT = MVT::Other;
-
-    const TargetRegisterClass *RC = *RCI;
-    // If none of the value types for this register class are valid, we
-    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
-    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
-         I != E; ++I) {
-      if (TLI.isTypeLegal(*I)) {
-        // If we have already found this register in a different register class,
-        // choose the one with the largest VT specified.  For example, on
-        // PowerPC, we favor f64 register classes over f32.
-        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
-          ThisVT = *I;
-          break;
-        }
-      }
-    }
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
  
-    if (ThisVT == MVT::Other) continue;
-
-    // NOTE: This isn't ideal.  In particular, this might allocate the
-    // frame pointer in functions that need it (due to them not being taken
-    // out of allocation, because a variable sized allocation hasn't been seen
-    // yet).  This is a slight code pessimization, but should still work.
-    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-      if (*I == Reg) {
-        // We found a matching register class.  Keep looking at others in case
-        // we find one with larger registers that this physreg is also in.
-        FoundRC = RC;
-        FoundVT = ThisVT;
-        break;
-      }
-  }
-  return FoundRC;
-}
+} // end anonymous namespace
  
  /// GetRegistersForValue - Assign registers (virtual or physical) for the
  /// specified operand.  We prefer to assign virtual registers, to allow the
@@ -5101,36 +5745,12 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
  /// allocation.  This produces generally horrible, but correct, code.
  ///
  ///   OpInfo describes the operand.
-///   Input and OutputRegs are the set of already allocated physical registers.
  ///
-void SelectionDAGBuilder::
-GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
-                     std::set<unsigned> &OutputRegs,
-                     std::set<unsigned> &InputRegs) {
-  LLVMContext &Context = FuncInfo.Fn->getContext();
-
-  // Compute whether this value requires an input register, an output register,
-  // or both.
-  bool isOutReg = false;
-  bool isInReg = false;
-  switch (OpInfo.Type) {
-  case InlineAsm::isOutput:
-    isOutReg = true;
-
-    // If there is an input constraint that matches this, we need to reserve
-    // the input register so no other inputs allocate to it.
-    isInReg = OpInfo.hasMatchingInput();
-    break;
-  case InlineAsm::isInput:
-    isInReg = true;
-    isOutReg = false;
-    break;
-  case InlineAsm::isClobber:
-    isOutReg = true;
-    isInReg = true;
-    break;
-  }
-
+static void GetRegistersForValue(SelectionDAG &DAG,
+                                 const TargetLowering &TLI,
+                                 DebugLoc DL,
+                                 SDISelAsmOperandInfo &OpInfo) {
+  LLVMContext &Context = *DAG.getContext();
  
    MachineFunction &MF = DAG.getMachineFunction();
    SmallVector<unsigned, 4> Regs;
@@ -5153,7 +5773,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
        // vector types).
        EVT RegVT = *PhysReg.second->vt_begin();
        if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
-        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                           RegVT, OpInfo.CallOperand);
          OpInfo.ConstraintVT = RegVT;
        } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -5163,7 +5783,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
          // machine.
          RegVT = EVT::getIntegerVT(Context,
                                    OpInfo.ConstraintVT.getSizeInBits());
-        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                           RegVT, OpInfo.CallOperand);
          OpInfo.ConstraintVT = RegVT;
        }
@@ -5205,8 +5825,6 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
      }
  
      OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
-    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
      return;
    }
  
@@ -5226,52 +5844,6 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
      return;
    }
  
-  // This is a reference to a register class that doesn't directly correspond
-  // to an LLVM register class.  Allocate NumRegs consecutive, available,
-  // registers from the class.
-  std::vector<unsigned> RegClassRegs
-    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                            OpInfo.ConstraintVT);
-
-  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-  unsigned NumAllocated = 0;
-  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
-    unsigned Reg = RegClassRegs[i];
-    // See if this register is available.
-    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
-        (isInReg  && InputRegs.count(Reg))) {    // Already used.
-      // Make sure we find consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Check to see if this register is allocatable (i.e. don't give out the
-    // stack pointer).
-    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
-    if (!RC) {        // Couldn't allocate this register.
-      // Reset NumAllocated to make sure we return consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Okay, this register is good, we can use it.
-    ++NumAllocated;
-
-    // If we allocated enough consecutive registers, succeed.
-    if (NumAllocated == NumRegs) {
-      unsigned RegStart = (i-NumAllocated)+1;
-      unsigned RegEnd   = i+1;
-      // Mark all of the allocated registers used.
-      for (unsigned i = RegStart; i != RegEnd; ++i)
-        Regs.push_back(RegClassRegs[i]);
-
-      OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
-                                         OpInfo.ConstraintVT);
-      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
-      return;
-    }
-  }
-
    // Otherwise, we couldn't allocate enough registers for this.
  }
  
@@ -5281,30 +5853,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
    const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
  
    /// ConstraintOperands - Information about all of the constraints.
-  std::vector<SDISelAsmOperandInfo> ConstraintOperands;
-
-  std::set<unsigned> OutputRegs, InputRegs;
+  SDISelAsmOperandInfoVector ConstraintOperands;
  
-  // Do a prepass over the constraints, canonicalizing them, and building up the
-  // ConstraintOperands list.
-  std::vector<InlineAsm::ConstraintInfo>
-    ConstraintInfos = IA->ParseConstraints();
+  TargetLowering::AsmOperandInfoVector
+    TargetConstraints = TLI.ParseConstraints(CS);
  
-  bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
-
-  SDValue Chain, Flag;
-
-  // We won't need to flush pending loads if this asm doesn't touch
-  // memory and is nonvolatile.
-  if (hasMemory || IA->hasSideEffects())
-    Chain = getRoot();
-  else
-    Chain = DAG.getRoot();
+  bool hasMemory = false;
  
    unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
    unsigned ResNo = 0;   // ResNo - The result number of the next output.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
-    ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
      SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
  
      EVT OpVT = MVT::Other;
@@ -5320,9 +5879,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
  
        // The return value of the call is this value.  As such, there is no
        // corresponding argument.
-      assert(!CS.getType()->isVoidTy() &&
-             "Bad inline asm!");
-      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+      assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+      if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
          OpVT = TLI.getValueType(STy->getElementType(ResNo));
        } else {
          assert(ResNo == 0 && "Asm only has one result!");
@@ -5341,9 +5899,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      // If this is an input or an indirect output, process the call argument.
      // BasicBlocks are labels, currently appearing only in asm's.
      if (OpInfo.CallOperandVal) {
-      // Strip bitcasts, if any.  This mostly comes up for functions.
-      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
-
        if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
          OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
        } else {
@@ -5354,11 +5909,34 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      }
  
      OpInfo.ConstraintVT = OpVT;
+
+    // Indirect operand accesses access memory.
+    if (OpInfo.isIndirect)
+      hasMemory = true;
+    else {
+      for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+        TargetLowering::ConstraintType
+          CType = TLI.getConstraintType(OpInfo.Codes[j]);
+        if (CType == TargetLowering::C_Memory) {
+          hasMemory = true;
+          break;
+        }
+      }
+    }
    }
  
+  SDValue Chain, Flag;
+
+  // We won't need to flush pending loads if this asm doesn't touch
+  // memory and is nonvolatile.
+  if (hasMemory || IA->hasSideEffects())
+    Chain = getRoot();
+  else
+    Chain = DAG.getRoot();
+
    // Second pass over the constraints: compute which constraint option to use
    // and assign registers to constraints that want a specific physreg.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
      SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
  
      // If this is an output operand with a matching input operand, look up the
@@ -5367,12 +5945,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      // error.
      if (OpInfo.hasMatchingInput()) {
        SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-      
+
        if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+       std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+         TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                           OpInfo.ConstraintVT);
+       std::pair<unsigned, const TargetRegisterClass*> InputRC =
+         TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+                                           Input.ConstraintVT);
          if ((OpInfo.ConstraintVT.isInteger() !=
               Input.ConstraintVT.isInteger()) ||
-            (OpInfo.ConstraintVT.getSizeInBits() !=
-             Input.ConstraintVT.getSizeInBits())) {
+            (MatchRC.second != InputRC.second)) {
            report_fatal_error("Unsupported asm: input constraint"
                               " with a matching output constraint of"
                               " incompatible type!");
@@ -5388,31 +5971,37 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      // need to to provide an address for the memory input.
      if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
          !OpInfo.isIndirect) {
-      assert(OpInfo.Type == InlineAsm::isInput &&
+      assert((OpInfo.isMultipleAlternative ||
+              (OpInfo.Type == InlineAsm::isInput)) &&
               "Can only indirectify direct input operands!");
  
        // Memory operands really want the address of the value.  If we don't have
        // an indirect input, put it in the constpool if we can, otherwise spill
        // it to a stack slot.
+      // TODO: This isn't quite right. We need to handle these according to
+      // the addressing mode that the constraint wants. Also, this may take
+      // an additional register for the computation and we don't want that
+      // either.
  
        // If the operand is a float, integer, or vector constant, spill to a
        // constant pool entry to get its address.
        const Value *OpVal = OpInfo.CallOperandVal;
        if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
-          isa<ConstantVector>(OpVal)) {
+          isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
          OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
                                                   TLI.getPointerTy());
        } else {
          // Otherwise, create a stack slot and emit a store to it before the
          // asm.
-        const Type *Ty = OpVal->getType();
+        Type *Ty = OpVal->getType();
          uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
          unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
          MachineFunction &MF = DAG.getMachineFunction();
          int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
          SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
          Chain = DAG.getStore(Chain, getCurDebugLoc(),
-                             OpInfo.CallOperand, StackSlot, NULL, 0,
+                             OpInfo.CallOperand, StackSlot,
+                             MachinePointerInfo::getFixedStack(SSFI),
                               false, false, 0);
          OpInfo.CallOperand = StackSlot;
        }
@@ -5427,11 +6016,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      // If this constraint is for a specific register, allocate it before
      // anything else.
      if (OpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
    }
  
-  ConstraintInfos.clear();
-
    // Second pass - Loop over all of the operands, assigning virtual or physregs
    // to register class operands.
    for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
@@ -5440,7 +6027,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      // C_Register operands have already been allocated, Other/Memory don't need
      // to be.
      if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
    }
  
    // AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -5456,9 +6043,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
    const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
    AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
  
-  // Remember the AlignStack bit as operand 3.
-  AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
-                                            MVT::i1));
+  // Remember the HasSideEffect and AlignStack bits as operand 3.
+  unsigned ExtraInfo = 0;
+  if (IA->hasSideEffects())
+    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+  if (IA->isAlignStack())
+    ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+                                                  TLI.getPointerTy()));
  
    // Loop over all of the inputs, copying the operand values into the
    // appropriate registers and processing the output regs.
@@ -5489,9 +6081,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
  
        // Copy the output from the appropriate register.  Find a register that
        // we can use.
-      if (OpInfo.AssignedRegs.Regs.empty())
-        report_fatal_error("Couldn't allocate output reg for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'!");
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        LLVMContext &Ctx = *DAG.getContext();
+        Ctx.emitError(CS.getInstruction(),  
+                      "couldn't allocate output register for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'");
+        break;
+      }
  
        // If this is an indirect operand, store through the pointer after the
        // asm.
@@ -5549,7 +6145,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                            " don't know how to handle tied "
                            "indirect register inputs");
            }
-          
+
            RegsForValue MatchedRegs;
            MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
            EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
@@ -5568,7 +6164,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                                             DAG, AsmNodeOperands);
            break;
          }
-        
+
          assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
          assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
                 "Unexpected number of operands");
@@ -5582,16 +6178,22 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
          break;
        }
  
-      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
-        assert(!OpInfo.isIndirect &&
-               "Don't know how to handle indirect other inputs yet!");
+      // Treat indirect 'X' constraint as memory.
+      if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+          OpInfo.isIndirect)
+        OpInfo.ConstraintType = TargetLowering::C_Memory;
  
+      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
          std::vector<SDValue> Ops;
-        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
                                           Ops, DAG);
-        if (Ops.empty())
-          report_fatal_error("Invalid operand for inline asm constraint '" +
-                             Twine(OpInfo.ConstraintCode) + "'!");
+        if (Ops.empty()) {
+          LLVMContext &Ctx = *DAG.getContext();
+          Ctx.emitError(CS.getInstruction(),
+                        "invalid operand for inline asm constraint '" +
+                        Twine(OpInfo.ConstraintCode) + "'");
+          break;
+        }
  
          // Add information to the INLINEASM node to know about this input.
          unsigned ResOpType =
@@ -5601,7 +6203,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
          AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
          break;
        }
-      
+
        if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
          assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
          assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
@@ -5622,10 +6224,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
               "Don't know how to handle indirect register inputs yet!");
  
        // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty() ||
-          !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
-        report_fatal_error("Couldn't allocate input reg for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'!");
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        LLVMContext &Ctx = *DAG.getContext();
+        Ctx.emitError(CS.getInstruction(), 
+                      "couldn't allocate input reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'");
+        break;
+      }
  
        OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                          Chain, &Flag);
@@ -5638,8 +6243,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        // Add the clobbered value to the operand list, so that the register
        // allocator is aware that the physreg got clobbered.
        if (!OpInfo.AssignedRegs.Regs.empty())
-        OpInfo.AssignedRegs.AddInlineAsmOperands(
-                                            InlineAsm::Kind_RegDefEarlyClobber,
+        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
                                                   false, 0, DAG,
                                                   AsmNodeOperands);
        break;
@@ -5652,7 +6256,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
    if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
  
    Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
-                      DAG.getVTList(MVT::Other, MVT::Flag),
+                      DAG.getVTList(MVT::Other, MVT::Glue),
                        &AsmNodeOperands[0], AsmNodeOperands.size());
    Flag = Chain.getValue(1);
  
@@ -5672,7 +6276,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        // not have the same VT as was expected.  Convert it to the right type
        // with bit_convert.
        if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
-        Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+        Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                            ResultType, Val);
  
        } else if (ResultType != Val.getValueType() &&
@@ -5710,7 +6314,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
                                 StoresToEmit[i].first,
                                 getValue(StoresToEmit[i].second),
-                               StoresToEmit[i].second, 0,
+                               MachinePointerInfo(StoresToEmit[i].second),
                                 false, false, 0);
      OutChains.push_back(Val);
    }
@@ -5730,9 +6334,11 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
  }
  
  void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+  const TargetData &TD = *TLI.getTargetData();
    SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                             getRoot(), getValue(I.getOperand(0)),
-                           DAG.getSrcValue(I.getOperand(0)));
+                           DAG.getSrcValue(I.getOperand(0)),
+                           TD.getABITypeAlignment(I.getType()));
    setValue(&I, V);
    DAG.setRoot(V.getValue(1));
  }
@@ -5758,23 +6364,24 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
  /// FIXME: When all targets are
  /// migrated to using LowerCall, this hook should be integrated into SDISel.
  std::pair<SDValue, SDValue>
-TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
                              bool RetSExt, bool RetZExt, bool isVarArg,
                              bool isInreg, unsigned NumFixedArgs,
                              CallingConv::ID CallConv, bool isTailCall,
-                            bool isReturnValueUsed,
+                            bool doesNotRet, bool isReturnValueUsed,
                              SDValue Callee,
                              ArgListTy &Args, SelectionDAG &DAG,
                              DebugLoc dl) const {
    // Handle all of the outgoing arguments.
    SmallVector<ISD::OutputArg, 32> Outs;
+  SmallVector<SDValue, 32> OutVals;
    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
      SmallVector<EVT, 4> ValueVTs;
      ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
      for (unsigned Value = 0, NumValues = ValueVTs.size();
           Value != NumValues; ++Value) {
        EVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+      Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
        SDValue Op = SDValue(Args[i].Node.getNode(),
                             Args[i].Node.getResNo() + Value);
        ISD::ArgFlagsTy Flags;
@@ -5791,16 +6398,17 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
          Flags.setSRet();
        if (Args[i].isByVal) {
          Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
+        PointerType *Ty = cast<PointerType>(Args[i].Ty);
+        Type *ElementTy = Ty->getElementType();
+        Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
          // For ByVal, alignment should come from FE.  BE will guess if this
          // info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
          if (Args[i].Alignment)
            FrameAlign = Args[i].Alignment;
+        else
+          FrameAlign = getByValTypeAlignment(ElementTy);
          Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
        }
        if (Args[i].isNest)
          Flags.setNest();
@@ -5821,13 +6429,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
  
        for (unsigned j = 0; j != NumParts; ++j) {
          // if it isn't first piece, alignment must be 1
-        ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
+        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+                               i < NumFixedArgs);
          if (NumParts > 1 && j == 0)
            MyFlags.Flags.setSplit();
          else if (j != 0)
            MyFlags.Flags.setOrigAlign(1);
  
          Outs.push_back(MyFlags);
+        OutVals.push_back(Parts[j]);
        }
      }
    }
@@ -5842,7 +6452,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
      unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
      for (unsigned i = 0; i != NumRegs; ++i) {
        ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT;
+      MyFlags.VT = RegisterVT.getSimpleVT();
        MyFlags.Used = isReturnValueUsed;
        if (RetSExt)
          MyFlags.Flags.setSExt();
@@ -5855,8 +6465,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
    }
  
    SmallVector<SDValue, 4> InVals;
-  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
-                    Outs, Ins, dl, DAG, InVals);
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
+                    Outs, OutVals, Ins, dl, DAG, InVals);
  
    // Verify that the target's LowerCall behaved as expected.
    assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
@@ -5878,7 +6488,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
    DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
            assert(InVals[i].getNode() &&
                   "LowerCall emitted a null value!");
-          assert(Ins[i].VT == InVals[i].getValueType() &&
+          assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
                   "LowerCall emitted a value with the wrong type!");
          });
  
@@ -5904,7 +6514,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
  
    // For a function returning void, there is no return value. We can't create
    // such a node, so we just return a null return value in that case. In
-  // that case, nothing will actualy look at the value.
+  // that case, nothing will actually look at the value.
    if (ReturnValues.empty())
      return std::make_pair(SDValue(), Chain);
  
@@ -5924,7 +6534,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
  
  SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    llvm_unreachable("LowerOperation not implemented for this target!");
-  return SDValue();
  }
  
  void
@@ -5943,6 +6552,25 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
  
  #include "llvm/CodeGen/SelectionDAGISel.h"
  
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
+  // With FastISel active, we may be splitting blocks, so force creation
+  // of virtual registers for all non-dead arguments.
+  if (FastISel)
+    return A->use_empty();
+
+  const BasicBlock *Entry = A->getParent()->begin();
+  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+    if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+      return false;  // Use not in entry block.
+  }
+  return true;
+}
+
  void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
    // If this is the entry block, emit arguments.
    const Function &F = *LLVMBB->getParent();
@@ -5952,14 +6580,10 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
    SmallVector<ISD::InputArg, 16> Ins;
  
    // Check whether the function can return without sret-demotion.
-  SmallVector<EVT, 4> OutVTs;
-  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
-  getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
-                OutVTs, OutsFlags, TLI);
-
-  FuncInfo->CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(),
-                                                F.isVarArg(),
-                                                OutVTs, OutsFlags, DAG);
+  SmallVector<ISD::OutputArg, 4> Outs;
+  GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+                Outs, TLI);
+
    if (!FuncInfo->CanLowerReturn) {
      // Put in an sret pointer parameter before all the other parameters.
      SmallVector<EVT, 1> ValueVTs;
@@ -5984,7 +6608,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
      for (unsigned Value = 0, NumValues = ValueVTs.size();
           Value != NumValues; ++Value) {
        EVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+      Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
        ISD::ArgFlagsTy Flags;
        unsigned OriginalAlignment =
          TD->getABITypeAlignment(ArgTy);
@@ -5999,16 +6623,17 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
          Flags.setSRet();
        if (F.paramHasAttr(Idx, Attribute::ByVal)) {
          Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(I->getType());
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
+        PointerType *Ty = cast<PointerType>(I->getType());
+        Type *ElementTy = Ty->getElementType();
+        Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
          // For ByVal, alignment should be passed from FE.  BE will guess if
          // this info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
          if (F.getParamAlignment(Idx))
            FrameAlign = F.getParamAlignment(Idx);
+        else
+          FrameAlign = TLI.getByValTypeAlignment(ElementTy);
          Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
        }
        if (F.paramHasAttr(Idx, Attribute::Nest))
          Flags.setNest();
@@ -6043,7 +6668,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
        for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
          assert(InVals[i].getNode() &&
                 "LowerFormalArguments emitted a null value!");
-        assert(Ins[i].VT == InVals[i].getValueType() &&
+        assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
                 "LowerFormalArguments emitted a value with the wrong type!");
        }
      });
@@ -6090,8 +6715,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
      if (I->use_empty() && NumValues)
        SDB->setUnusedArgValue(I, InVals[i]);
  
-    for (unsigned Value = 0; Value != NumValues; ++Value) {
-      EVT VT = ValueVTs[Value];
+    for (unsigned Val = 0; Val != NumValues; ++Val) {
+      EVT VT = ValueVTs[Val];
        EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
        unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
  
@@ -6110,15 +6735,42 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
        i += NumParts;
      }
  
-    if (!I->use_empty()) {
-      SDValue Res;
-      if (!ArgValues.empty())
-        Res = DAG.getMergeValues(&ArgValues[0], NumValues,
-                                 SDB->getCurDebugLoc());
-      SDB->setValue(I, Res);
+    // We don't need to do anything else for unused arguments.
+    if (ArgValues.empty())
+      continue;
+
+    // Note down frame index.
+    if (FrameIndexSDNode *FI =
+       dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+      FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+
+    SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+                                     SDB->getCurDebugLoc());
+
+    SDB->setValue(I, Res);
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+      if (LoadSDNode *LNode = 
+          dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+        if (FrameIndexSDNode *FI =
+            dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+        FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+    }
  
-      // If this argument is live outside of the entry block, insert a copy from
-      // whereever we got it to the vreg that other BB's will reference it as.
+    // If this argument is live outside of the entry block, insert a copy from
+    // wherever we got it to the vreg that other BB's will reference it as.
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+      // If we can, though, try to skip creating an unnecessary vreg.
+      // FIXME: This isn't very clean... it would be nice to make this more
+      // general.  It's also subtly incompatible with the hacks FastISel
+      // uses with vregs.
+      unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        FuncInfo->ValueMap[I] = Reg;
+        continue;
+      }
+    }
+    if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
+      FuncInfo->InitializeRegForValue(I);
        SDB->CopyToExportRegsIfNeeded(I);
      }
    }
@@ -6164,6 +6816,10 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
        // Ignore dead phi's.
        if (PN->use_empty()) continue;
  
+      // Skip empty types
+      if (PN->getType()->isEmptyTy())
+        continue;
+
        unsigned Reg;
        const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);