Add support for legalization of vector trunc-store where the saved scalar type is...

[oota-llvm.git] / lib / CodeGen / SelectionDAG / LegalizeDAG.cpp
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

index 88e354abcb2e245502608e80e667df7ac2788ce2..8235db85e34fa7cc0cb5225977f0234bbe6356f9 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -14,23 +14,16 @@
  #include "llvm/Analysis/DebugInfo.h"
  #include "llvm/CodeGen/Analysis.h"
  #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/Target/TargetFrameLowering.h"
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Target/TargetData.h"
  #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
  #include "llvm/CallingConv.h"
  #include "llvm/Constants.h"
  #include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
  #include "llvm/LLVMContext.h"
-#include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/MathExtras.h"
@@ -57,7 +50,6 @@ class SelectionDAGLegalize {
    const TargetMachine &TM;
    const TargetLowering &TLI;
    SelectionDAG &DAG;
-  CodeGenOpt::Level OptLevel;
  
    // Libcall insertion helpers.
  
@@ -66,16 +58,10 @@ class SelectionDAGLegalize {
    /// against each other, including inserted libcalls.
    SDValue LastCALLSEQ_END;
  
-  enum LegalizeAction {
-    Legal,      // The target natively supports this operation.
-    Promote,    // This operation should be executed in a larger type.
-    Expand      // Try to expand this to other ops, otherwise use a libcall.
-  };
-
-  /// ValueTypeActions - This is a bitvector that contains two bits for each
-  /// value type, where the two bits correspond to the LegalizeAction enum.
-  /// This can be queried with "getTypeAction(VT)".
-  TargetLowering::ValueTypeActionImpl ValueTypeActions;
+  /// IsLegalizingCall - This member is used *only* for purposes of providing
+  /// helpful assertions that a libcall isn't created while another call is
+  /// being legalized (which could lead to non-serialized call sequences).
+  bool IsLegalizingCall;
  
    /// LegalizedNodes - For nodes that are of legal width, and that have more
    /// than one use, this map indicates what regularized operand to use.  This
@@ -87,30 +73,19 @@ class SelectionDAGLegalize {
      // If someone requests legalization of the new node, return itself.
      if (From != To)
        LegalizedNodes.insert(std::make_pair(To, To));
-  }
-
-public:
-  SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
  
-  /// getTypeAction - Return how we should legalize values of this type, either
-  /// it is already legal or we need to expand it into multiple registers of
-  /// smaller integer type, or we need to promote it to a larger type.
-  LegalizeAction getTypeAction(EVT VT) const {
-    return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+    // Transfer SDDbgValues.
+    DAG.TransferDbgValues(From, To);
    }
  
-  /// isTypeLegal - Return true if this type is legal on this target.
-  ///
-  bool isTypeLegal(EVT VT) const {
-    return getTypeAction(VT) == Legal;
-  }
+public:
+  explicit SelectionDAGLegalize(SelectionDAG &DAG);
  
    void LegalizeDAG();
  
  private:
-  /// LegalizeOp - We know that the specified value has a legal type.
-  /// Recursively ensure that the operands have legal types, then return the
-  /// result.
+  /// LegalizeOp - Return a legal replacement for the given operation, with
+  /// all legal operands.
    SDValue LegalizeOp(SDValue O);
  
    SDValue OptimizeFloatStore(StoreSDNode *ST);
@@ -139,6 +114,9 @@ private:
                               DebugLoc dl);
  
    SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+  SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+                        unsigned NumOps, bool isSigned, DebugLoc dl);
+
    std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
                                                   SDNode *Node, bool isSigned);
    SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -150,6 +128,7 @@ private:
                             RTLIB::Libcall Call_I32,
                             RTLIB::Libcall Call_I64,
                             RTLIB::Libcall Call_I128);
+  void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  
    SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
    SDValue ExpandBUILD_VECTOR(SDNode *Node);
@@ -168,6 +147,7 @@ private:
    SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
  
    SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+  SDValue ExpandInsertToVectorThroughStack(SDValue Op);
    SDValue ExpandVectorBuildThroughStack(SDNode* Node);
  
    std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
@@ -209,17 +189,14 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
    return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
  }
  
-SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
-                                           CodeGenOpt::Level ol)
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
    : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
-    DAG(dag), OptLevel(ol),
-    ValueTypeActions(TLI.getValueTypeActions()) {
-  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
-         "Too many value types for ValueTypeActions to hold!");
+    DAG(dag) {
  }
  
  void SelectionDAGLegalize::LegalizeDAG() {
    LastCALLSEQ_END = DAG.getEntryNode();
+  IsLegalizingCall = false;
  
    // The legalize process is inherently a bottom-up recursive process (users
    // legalize their uses before themselves).  Given infinite stack space, we
@@ -383,7 +360,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
          // smaller type.
          TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
          TLI.ShouldShrinkFPConstant(OrigVT)) {
-      const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+      Type *SType = SVT.getTypeForEVT(*DAG.getContext());
        LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
        VT = SVT;
        Extend = true;
@@ -393,7 +370,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
    SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
    unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
    if (Extend)
-    return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+    return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
                            DAG.getEntryNode(),
                            CPIdx, MachinePointerInfo::getConstantPool(),
                            VT, false, false, Alignment);
@@ -422,68 +399,67 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
        SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
        return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
                            ST->isVolatile(), ST->isNonTemporal(), Alignment);
-    } else {
-      // Do a (aligned) store to a stack slot, then copy from the stack slot
-      // to the final destination using (unaligned) integer loads and stores.
-      EVT StoredVT = ST->getMemoryVT();
-      EVT RegVT =
-        TLI.getRegisterType(*DAG.getContext(),
-                            EVT::getIntegerVT(*DAG.getContext(),
-                                              StoredVT.getSizeInBits()));
-      unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
-      unsigned RegBytes = RegVT.getSizeInBits() / 8;
-      unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
-
-      // Make sure the stack slot is also aligned for the register type.
-      SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
-
-      // Perform the original store, only redirected to the stack slot.
-      SDValue Store = DAG.getTruncStore(Chain, dl,
-                                        Val, StackPtr, MachinePointerInfo(),
-                                        StoredVT, false, false, 0);
-      SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
-      SmallVector<SDValue, 8> Stores;
-      unsigned Offset = 0;
-
-      // Do all but one copies using the full register width.
-      for (unsigned i = 1; i < NumRegs; i++) {
-        // Load one integer register's worth from the stack slot.
-        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
-                                   MachinePointerInfo(),
-                                   false, false, 0);
-        // Store it to the final location.  Remember the store.
-        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
-                                    ST->getPointerInfo().getWithOffset(Offset),
-                                      ST->isVolatile(), ST->isNonTemporal(),
-                                      MinAlign(ST->getAlignment(), Offset)));
-        // Increment the pointers.
-        Offset += RegBytes;
-        StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                               Increment);
-        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
-      }
+    }
+    // Do a (aligned) store to a stack slot, then copy from the stack slot
+    // to the final destination using (unaligned) integer loads and stores.
+    EVT StoredVT = ST->getMemoryVT();
+    EVT RegVT =
+      TLI.getRegisterType(*DAG.getContext(),
+                          EVT::getIntegerVT(*DAG.getContext(),
+                                            StoredVT.getSizeInBits()));
+    unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+    unsigned RegBytes = RegVT.getSizeInBits() / 8;
+    unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
  
-      // The last store may be partial.  Do a truncating store.  On big-endian
-      // machines this requires an extending load from the stack slot to ensure
-      // that the bits are in the right place.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
-                                    8 * (StoredBytes - Offset));
+    // Make sure the stack slot is also aligned for the register type.
+    SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
  
-      // Load from the stack slot.
-      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
-                                    MachinePointerInfo(),
-                                    MemVT, false, false, 0);
+    // Perform the original store, only redirected to the stack slot.
+    SDValue Store = DAG.getTruncStore(Chain, dl,
+                                      Val, StackPtr, MachinePointerInfo(),
+                                      StoredVT, false, false, 0);
+    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SmallVector<SDValue, 8> Stores;
+    unsigned Offset = 0;
  
-      Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
-                                         ST->getPointerInfo()
-                                           .getWithOffset(Offset),
-                                         MemVT, ST->isVolatile(),
-                                         ST->isNonTemporal(),
-                                         MinAlign(ST->getAlignment(), Offset)));
-      // The order of the stores doesn't matter - say it with a TokenFactor.
-      return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
-                         Stores.size());
+    // Do all but one copies using the full register width.
+    for (unsigned i = 1; i < NumRegs; i++) {
+      // Load one integer register's worth from the stack slot.
+      SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+                                 MachinePointerInfo(),
+                                 false, false, 0);
+      // Store it to the final location.  Remember the store.
+      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+                                  ST->getPointerInfo().getWithOffset(Offset),
+                                    ST->isVolatile(), ST->isNonTemporal(),
+                                    MinAlign(ST->getAlignment(), Offset)));
+      // Increment the pointers.
+      Offset += RegBytes;
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             Increment);
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
      }
+
+    // The last store may be partial.  Do a truncating store.  On big-endian
+    // machines this requires an extending load from the stack slot to ensure
+    // that the bits are in the right place.
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  8 * (StoredBytes - Offset));
+
+    // Load from the stack slot.
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+                                  MachinePointerInfo(),
+                                  MemVT, false, false, 0);
+
+    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+                                       ST->getPointerInfo()
+                                         .getWithOffset(Offset),
+                                       MemVT, ST->isVolatile(),
+                                       ST->isNonTemporal(),
+                                       MinAlign(ST->getAlignment(), Offset)));
+    // The order of the stores doesn't matter - say it with a TokenFactor.
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                       Stores.size());
    }
    assert(ST->getMemoryVT().isInteger() &&
           !ST->getMemoryVT().isVector() &&
@@ -494,7 +470,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
    int IncrementSize = NumBits / 8;
  
    // Divide the stored value in two parts.
-  SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                      TLI.getShiftAmountTy(Val.getValueType()));
    SDValue Lo = Val;
    SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
  
@@ -574,7 +551,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
      // The last copy may be partial.  Do an extending load.
      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
                                    8 * (LoadedBytes - Offset));
-    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                    LD->getPointerInfo().getWithOffset(Offset),
                                    MemVT, LD->isVolatile(),
                                    LD->isNonTemporal(),
@@ -591,7 +568,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                               Stores.size());
  
      // Finally, perform the original load only redirected to the stack slot.
-    Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
+    Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
                            MachinePointerInfo(), LoadedVT, false, false, 0);
  
      // Callers expect a MERGE_VALUES node.
@@ -619,29 +596,30 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
    // Load the value in two parts
    SDValue Lo, Hi;
    if (TLI.isLittleEndian()) {
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getPointerInfo(),
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
                          NewLoadedVT, LD->isVolatile(),
                          LD->isNonTemporal(), Alignment);
      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                        DAG.getConstant(IncrementSize, TLI.getPointerTy()));
-    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr,
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
                          LD->getPointerInfo().getWithOffset(IncrementSize),
                          NewLoadedVT, LD->isVolatile(),
                          LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
    } else {
-    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getPointerInfo(),
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
                          NewLoadedVT, LD->isVolatile(),
                          LD->isNonTemporal(), Alignment);
      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                        DAG.getConstant(IncrementSize, TLI.getPointerTy()));
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr,
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
                          LD->getPointerInfo().getWithOffset(IncrementSize),
                          NewLoadedVT, LD->isVolatile(),
                          LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
    }
  
    // aggregate the two parts
-  SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                       TLI.getShiftAmountTy(Hi.getValueType()));
    SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
    Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
  
@@ -742,7 +720,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
    DebugLoc dl = ST->getDebugLoc();
    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
      if (CFP->getValueType(0) == MVT::f32 &&
-        getTypeAction(MVT::i32) == Legal) {
+        TLI.isTypeLegal(MVT::i32)) {
        Tmp3 = DAG.getConstant(CFP->getValueAPF().
                                        bitcastToAPInt().zextOrTrunc(32),
                                MVT::i32);
@@ -752,14 +730,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
  
      if (CFP->getValueType(0) == MVT::f64) {
        // If this target supports 64-bit registers, do a single 64-bit store.
-      if (getTypeAction(MVT::i64) == Legal) {
+      if (TLI.isTypeLegal(MVT::i64)) {
          Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                    zextOrTrunc(64), MVT::i64);
          return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
                              isVolatile, isNonTemporal, Alignment);
        }
  
-      if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+      if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
          // Otherwise, if the target supports 32-bit registers, use 2 32-bit
          // stores.  If the target supports neither 32- nor 64-bits, this
          // xform is certainly not worth it.
@@ -780,13 +758,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
        }
      }
    }
-  return SDValue();
+  return SDValue(0, 0);
  }
  
-/// LegalizeOp - We know that the specified value has a legal type, and
-/// that its operands are legal.  Now ensure that the operation itself
-/// is legal, recursively ensuring that the operands' operations remain
-/// legal.
+/// LegalizeOp - Return a legal replacement for the given operation, with
+/// all legal operands.
  SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
    if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
      return Op;
@@ -795,11 +771,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
    DebugLoc dl = Node->getDebugLoc();
  
    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
-    assert(getTypeAction(Node->getValueType(i)) == Legal &&
+    assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+             TargetLowering::TypeLegal &&
             "Unexpected illegal type!");
  
    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
-    assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+    assert((TLI.getTypeAction(*DAG.getContext(),
+                              Node->getOperand(i).getValueType()) ==
+              TargetLowering::TypeLegal ||
              Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
             "Unexpected illegal type!");
  
@@ -813,7 +792,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
    bool isCustom = false;
  
    // Figure out the correct action; the way to query this varies by opcode
-  TargetLowering::LegalizeAction Action;
+  TargetLowering::LegalizeAction Action = TargetLowering::Legal;
    bool SimpleFinishLegalizing = true;
    switch (Node->getOpcode()) {
    case ISD::INTRINSIC_W_CHAIN:
@@ -835,6 +814,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
      Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
      break;
    }
+  case ISD::ATOMIC_STORE: {
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                                    Node->getOperand(2).getValueType());
+    break;
+  }
    case ISD::SELECT_CC:
    case ISD::SETCC:
    case ISD::BR_CC: {
@@ -888,7 +872,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
      if (Action == TargetLowering::Legal)
        Action = TargetLowering::Expand;
      break;
-  case ISD::TRAMPOLINE:
+  case ISD::INIT_TRAMPOLINE:
+  case ISD::ADJUST_TRAMPOLINE:
    case ISD::FRAMEADDR:
    case ISD::RETURNADDR:
      // These operations lie about being legal: when they claim to be legal,
@@ -930,7 +915,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
      case ISD::BRCOND:
        // Branches tweak the chain to include LastCALLSEQ_END
        Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
-                            LastCALLSEQ_END);
+                           LastCALLSEQ_END);
        Ops[0] = LegalizeOp(Ops[0]);
        LastCALLSEQ_END = DAG.getEntryNode();
        break;
@@ -942,7 +927,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
        // Legalizing shifts/rotates requires adjusting the shift amount
        // to the appropriate width.
        if (!Ops[1].getValueType().isVector())
-        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
+        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[1]));
        break;
      case ISD::SRL_PARTS:
      case ISD::SRA_PARTS:
@@ -950,7 +936,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
        // Legalizing shifts/rotates requires adjusting the shift amount
        // to the appropriate width.
        if (!Ops[2].getValueType().isVector())
-        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2]));
+        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[2]));
        break;
      }
  
@@ -1018,7 +1005,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
      }
      break;
    case ISD::CALLSEQ_START: {
-    static int depth = 0;
      SDNode *CallEnd = FindCallEndFromCallStart(Node);
  
      // Recursively Legalize all of the inputs of the call end that do not lead
@@ -1036,7 +1022,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
  
      // Merge in the last call to ensure that this call starts after the last
      // call ended.
-    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {
+    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
        Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                           Tmp1, LastCALLSEQ_END);
        Tmp1 = LegalizeOp(Tmp1);
@@ -1059,18 +1045,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
      // sequence have been legalized, legalize the call itself.  During this
      // process, no libcalls can/will be inserted, guaranteeing that no calls
      // can overlap.
-
-    SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;
+    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
      // Note that we are selecting this call!
      LastCALLSEQ_END = SDValue(CallEnd, 0);
+    IsLegalizingCall = true;
  
-    depth++;
      // Legalize the call, starting from the CALLSEQ_END.
      LegalizeOp(LastCALLSEQ_END);
-    depth--;
-    assert(depth >= 0 && "Un-matched CALLSEQ_START?");
-    if (depth > 0)
-      LastCALLSEQ_END = Saved_LastCALLSEQ_END;
+    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
      return Result;
    }
    case ISD::CALLSEQ_END:
@@ -1109,7 +1091,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                           Result.getResNo());
        }
      }
+    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
      // This finishes up call legalization.
+    IsLegalizingCall = false;
+
      // If the CALLSEQ_END node has a flag, remember that we legalized it.
      AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
      if (Node->getNumValues() == 2)
@@ -1135,7 +1120,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
          // If this is an unaligned load and the target doesn't support it,
          // expand it.
          if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
-          const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+          Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
            unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
            if (LD->getAlignment() < ABIAlignment){
              Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
@@ -1203,7 +1188,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
        ISD::LoadExtType NewExtType =
          ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
  
-      Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
+      Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
                                Tmp1, Tmp2, LD->getPointerInfo(),
                                NVT, isVolatile, isNonTemporal, Alignment);
  
@@ -1239,7 +1224,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
        if (TLI.isLittleEndian()) {
          // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
          // Load the bottom RoundWidth bits.
-        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
                              Tmp1, Tmp2,
                              LD->getPointerInfo(), RoundVT, isVolatile,
                              isNonTemporal, Alignment);
@@ -1248,7 +1233,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
          IncrementSize = RoundWidth / 8;
          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                             DAG.getIntPtrConstant(IncrementSize));
-        Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                              LD->getPointerInfo().getWithOffset(IncrementSize),
                              ExtraVT, isVolatile, isNonTemporal,
                              MinAlign(Alignment, IncrementSize));
@@ -1260,7 +1245,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
  
          // Move the top bits to the right place.
          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                         DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+                         DAG.getConstant(RoundWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
  
          // Join the hi and lo parts.
          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1268,7 +1254,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
          // Big endian - avoid unaligned loads.
          // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
          // Load the top RoundWidth bits.
-        Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                              LD->getPointerInfo(), RoundVT, isVolatile,
                              isNonTemporal, Alignment);
  
@@ -1277,7 +1263,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                             DAG.getIntPtrConstant(IncrementSize));
          Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
-                            Node->getValueType(0), dl, Tmp1, Tmp2,
+                            dl, Node->getValueType(0), Tmp1, Tmp2,
                              LD->getPointerInfo().getWithOffset(IncrementSize),
                              ExtraVT, isVolatile, isNonTemporal,
                              MinAlign(Alignment, IncrementSize));
@@ -1289,7 +1275,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
  
          // Move the top bits to the right place.
          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                         DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+                         DAG.getConstant(ExtraWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
  
          // Join the hi and lo parts.
          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1320,7 +1307,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
            // If this is an unaligned load and the target doesn't support it,
            // expand it.
            if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
-            const Type *Ty =
+            Type *Ty =
                LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
              unsigned ABIAlignment =
                TLI.getTargetData()->getABITypeAlignment(Ty);
@@ -1336,7 +1323,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
          }
          break;
        case TargetLowering::Expand:
-        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
+        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
            SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
                                       LD->getPointerInfo(),
                                       LD->isVolatile(), LD->isNonTemporal(),
@@ -1356,6 +1343,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
            Tmp2 = LegalizeOp(Load.getValue(1));
            break;
          }
+
+        // If this is a promoted vector load, and the vector element types are
+        // legal, then scalarize it.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+          TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  Node->getValueType(0).getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+
+            LoadVals.push_back(ScalarLoad.getValue(0));
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+            Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+        }
+
+        // If this is a promoted vector load, and the vector element types are
+        // illegal, create the promoted vector from bitcasted segments.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+          EVT MemElemTy = Node->getValueType(0).getScalarType();
+          EVT SrcSclrTy = SrcVT.getScalarType();
+          unsigned SizeRatio =
+            (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  SrcVT.getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+            if (TLI.isBigEndian()) {
+              // MSB (which is garbage, comes first)
+              LoadVals.push_back(ScalarLoad.getValue(0));
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+            } else {
+              // LSB (which is data, comes first)
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+              LoadVals.push_back(ScalarLoad.getValue(0));
+            }
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+            SrcVT.getScalarType(), NumElem*SizeRatio);
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, 
+            TempWideVector, &LoadVals[0], LoadVals.size());
+
+          // Cast to the correct type
+          ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+
+        }
+
          // FIXME: This does not work for vectors on most targets.  Sign- and
          // zero-extend operations are currently folded into extending loads,
          // whether they are legal or not, and then we end up here without any
@@ -1364,7 +1436,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                 "EXTLOAD should always be supported!");
          // Turn the unsupported load into an EXTLOAD followed by an explicit
          // zero/sign extend inreg.
-        Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
+        Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
                                  Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
                                  LD->isVolatile(), LD->isNonTemporal(),
                                  LD->getAlignment());
@@ -1415,7 +1487,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
            // If this is an unaligned store and the target doesn't support it,
            // expand it.
            if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
-            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
              unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
              if (ST->getAlignment() < ABIAlignment)
                Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
@@ -1478,7 +1550,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                               DAG.getIntPtrConstant(IncrementSize));
            Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
-                           DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+                           DAG.getConstant(RoundWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
            Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
                               ST->getPointerInfo().getWithOffset(IncrementSize),
                                   ExtraVT, isVolatile, isNonTemporal,
@@ -1488,7 +1561,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
            // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
            // Store the top RoundWidth bits.
            Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
-                           DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+                           DAG.getConstant(ExtraWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
            Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
                                   RoundVT, isVolatile, isNonTemporal, Alignment);
  
@@ -1518,7 +1592,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
            // If this is an unaligned store and the target doesn't support it,
            // expand it.
            if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
-            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
              unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
              if (ST->getAlignment() < ABIAlignment)
                Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
@@ -1528,9 +1602,110 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
          case TargetLowering::Custom:
            Result = TLI.LowerOperation(Result, DAG);
            break;
-        case Expand:
+        case TargetLowering::Expand:
+
+          EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+          EVT NarrowScalarVT = StVT.getScalarType();
+
+          if (StVT.isVector()) {
+            unsigned NumElem = StVT.getVectorNumElements();
+            // The type of the data we want to save
+            EVT RegVT = Tmp3.getValueType();
+            EVT RegSclVT = RegVT.getScalarType();
+            // The type of data as saved in memory.
+            EVT MemSclVT = StVT.getScalarType();
+
+            bool RegScalarLegal = TLI.isTypeLegal(RegSclVT);
+            bool MemScalarLegal = TLI.isTypeLegal(MemSclVT);
+
+            // We need to expand this store. If the register element type
+            // is legal then we can scalarize the vector and use
+            // truncating stores.
+            if (RegScalarLegal) {
+              // Cast floats into integers
+              unsigned ScalarSize = MemSclVT.getSizeInBits();
+              EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+              // Round odd types to the next pow of two.
+              if (!isPowerOf2_32(ScalarSize))
+                ScalarSize = NextPowerOf2(ScalarSize);
+
+              // Store Stride in bytes
+              unsigned Stride = ScalarSize/8;
+              // Extract each of the elements from the original vector
+              // and save them into memory individually.
+              SmallVector<SDValue, 8> Stores;
+              for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+                SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+                Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                   DAG.getIntPtrConstant(Stride));
+
+                // This scalar TruncStore may be illegal, but we lehalize it
+                // later.
+                SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2,
+                      ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+                      isVolatile, isNonTemporal, Alignment);
+
+                Stores.push_back(Store);
+              }
+
+              Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                   &Stores[0], Stores.size());
+              break;
+            }
+
+            // The scalar register type is illegal.
+            // For example saving <2 x i64> -> <2 x i32> on a x86.
+            // In here we bitcast the value into a vector of smaller parts and
+            // save it using smaller scalars.
+            if (!RegScalarLegal && MemScalarLegal) {
+              // Store Stride in bytes
+              unsigned Stride = MemSclVT.getSizeInBits()/8;
+
+              unsigned SizeRatio =
+                (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits());
+
+              EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(),
+                                                 MemSclVT,
+                                                 SizeRatio * NumElem);
+
+              // Cast the wide elem vector to wider vec with smaller elem type.
+              // Example <2 x i64> -> <4 x i32>
+              Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+              SmallVector<SDValue, 8> Stores;
+              for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) {
+                // Extract the Ith element.
+                SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                               NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+                // Bump pointer.
+                Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                   DAG.getIntPtrConstant(Stride));
+
+                // Store if, this element is:
+                //  - First element on big endian, or
+                //  - Last element on little endian
+                if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) ||
+                    ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) {
+                  SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                  ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                           isVolatile, isNonTemporal, Alignment);
+                  Stores.push_back(Store);
+                }
+              }
+              Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                   &Stores[0], Stores.size());
+              break;
+            }
+
+            assert(false && "Unable to legalize the vector trunc store!");
+          }// is vector
+
+
            // TRUNCSTORE:i16 i32 -> STORE i16
-          assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
+          assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
            Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
            Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
                                  isVolatile, isNonTemporal, Alignment);
@@ -1579,12 +1754,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
    if (Op.getValueType().isVector())
      return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
                         false, false, 0);
-  return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
                          MachinePointerInfo(),
                          Vec.getValueType().getVectorElementType(),
                          false, false, 0);
  }
  
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+  assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+  SDValue Vec  = Op.getOperand(0);
+  SDValue Part = Op.getOperand(1);
+  SDValue Idx  = Op.getOperand(2);
+  DebugLoc dl  = Op.getDebugLoc();
+
+  // Store the value to a temporary stack slot, then LOAD the returned part.
+
+  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+  // First store the whole vector.
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+                            false, false, 0);
+
+  // Then store the inserted part.
+
+  // Add the offset to the index.
+  unsigned EltSize =
+      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(EltSize, Idx.getValueType()));
+
+  if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+  else
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                                    StackPtr);
+
+  // Store the subvector.
+  Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+                    MachinePointerInfo(), false, false, 0);
+
+  // Finally, load the updated vector.
+  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+                     false, false, 0);
+}
+
  SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
    // We can't handle this case efficiently.  Allocate a sufficiently
    // aligned object on the stack, store each element into it, then load
@@ -1645,7 +1864,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
    SDValue SignBit;
    EVT FloatVT = Tmp2.getValueType();
    EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
-  if (isTypeLegal(IVT)) {
+  if (TLI.isTypeLegal(IVT)) {
      // Convert to an integer with the same sign bit.
      SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
    } else {
@@ -1679,7 +1898,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
        assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
        if (BitShift)
          SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
-                              DAG.getConstant(BitShift,TLI.getShiftAmountTy()));
+                              DAG.getConstant(BitShift,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
      }
    }
    // Now get the sign bit proper, by seeing whether the value is negative.
@@ -1794,7 +2014,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
    unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
    unsigned SlotSize = SlotVT.getSizeInBits();
    unsigned DestSize = DestVT.getSizeInBits();
-  const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+  Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
    unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
  
    // Emit a store to the stack slot.  Use a truncstore if the input value is
@@ -1816,7 +2036,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
                         false, false, DestAlign);
  
    assert(SlotSize < DestSize && "Unknown extension!");
-  return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr,
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
                          PtrInfo, SlotVT, false, false, DestAlign);
  }
  
@@ -1901,7 +2121,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
          }
        } else {
          assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
-        const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+        Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
          CV.push_back(UndefValue::get(OpNTy));
        }
      }
@@ -1945,6 +2165,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
  // and leave the Hi part unset.
  SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                              bool isSigned) {
+  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
    // The input chain to this libcall is the entry node of the function.
    // Legalizing the call will automatically add the previous call to the
    // dependence.
@@ -1954,7 +2175,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
    TargetLowering::ArgListEntry Entry;
    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
      EVT ArgVT = Node->getOperand(i).getValueType();
-    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
      Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
      Entry.isSExt = isSigned;
      Entry.isZExt = !isSigned;
@@ -1964,7 +2185,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                           TLI.getPointerTy());
  
    // Splice the libcall in wherever FindInputOutputChains tells us to.
-  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+  Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
  
    // isTailCall may be true since the callee does not reference caller stack
    // frame. Check if it's in the right position.
@@ -1986,19 +2207,54 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
    return CallInfo.first;
  }
  
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+                                            const SDValue *Ops, unsigned NumOps,
+                                            bool isSigned, DebugLoc dl) {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumOps);
+
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    Entry.Node = Ops[i];
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue,SDValue> CallInfo =
+  TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+                  false, 0, TLI.getLibcallCallingConv(LC), false,
+                  /*isReturnValueUsed=*/true,
+                  Callee, Args, DAG, dl);
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+
+  return CallInfo.first;
+}
+
  // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
  // ExpandLibCall except that the first operand is the in-chain.
  std::pair<SDValue, SDValue>
  SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                                           SDNode *Node,
                                           bool isSigned) {
+  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
    SDValue InChain = Node->getOperand(0);
  
    TargetLowering::ArgListTy Args;
    TargetLowering::ArgListEntry Entry;
    for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
      EVT ArgVT = Node->getOperand(i).getValueType();
-    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
      Entry.Node = Node->getOperand(i);
      Entry.Ty = ArgTy;
      Entry.isSExt = isSigned;
@@ -2009,7 +2265,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                                           TLI.getPointerTy());
  
    // Splice the libcall in wherever FindInputOutputChains tells us to.
-  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+  Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
    std::pair<SDValue, SDValue> CallInfo =
      TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                      0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
@@ -2057,6 +2313,113 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
    return ExpandLibCall(LC, Node, isSigned);
  }
  
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+                                     const TargetLowering &TLI) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+  }
+
+  return TLI.getLibcallName(LC) != 0;
+}
+
+/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+  unsigned OtherOpcode = 0;
+  if (isSigned)
+    OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
+  else
+    OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
+
+  SDValue Op0 = Node->getOperand(0);
+  SDValue Op1 = Node->getOperand(1);
+  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+         UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User == Node)
+      continue;
+    if (User->getOpcode() == OtherOpcode &&
+        User->getOperand(0) == Op0 &&
+        User->getOperand(1) == Op1)
+      return true;
+  }
+  return false;
+}
+
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+                                          SmallVectorImpl<SDValue> &Results) {
+  unsigned Opcode = Node->getOpcode();
+  bool isSigned = Opcode == ISD::SDIVREM;
+
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+  }
+
+  // The input chain to this libcall is the entry node of the function.
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDValue InChain = DAG.getEntryNode();
+
+  EVT RetVT = Node->getValueType(0);
+  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+
+  // Also pass the return address of the remainder.
+  SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+  Entry.Node = FIPtr;
+  Entry.Ty = RetTy->getPointerTo();
+  Entry.isSExt = isSigned;
+  Entry.isZExt = !isSigned;
+  Args.push_back(Entry);
+
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  DebugLoc dl = Node->getDebugLoc();
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+
+  // Remainder is loaded back from the stack frame.
+  SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
+                            MachinePointerInfo(), false, false, 0);
+  Results.push_back(CallInfo.first);
+  Results.push_back(Rem);
+}
+
  /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
  /// INT_TO_FP operation of the specified operand when the target requests that
  /// we expand it.  At this point, we know that the result and operand types are
@@ -2159,7 +2522,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
      if (!isSigned) {
        SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
  
-      SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
+      SDValue ShiftConst =
+          DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
        SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
        SDValue AndConst = DAG.getConstant(1, MVT::i64);
        SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
@@ -2178,7 +2542,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
      }
  
      // Otherwise, implement the fully general conversion.
-    EVT SHVT = TLI.getShiftAmountTy();
  
      SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
           DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
@@ -2193,6 +2556,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                     Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
                     ISD::SETUGE);
      SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+    EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
  
      SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
                               DAG.getConstant(32, SHVT));
@@ -2243,7 +2607,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                               false, false, Alignment);
    else {
      FudgeInReg =
-      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
+      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
                                  DAG.getEntryNode(), CPIdx,
                                  MachinePointerInfo::getConstantPool(),
                                  MVT::f32, false, false, Alignment));
@@ -2339,7 +2703,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
  ///
  SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
    EVT VT = Op.getValueType();
-  EVT SHVT = TLI.getShiftAmountTy();
+  EVT SHVT = TLI.getShiftAmountTy(VT);
    SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
    switch (VT.getSimpleVT().SimpleTy) {
    default: assert(0 && "Unhandled Expand type in BSWAP!");
@@ -2402,7 +2766,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
    default: assert(0 && "Cannot expand this yet!");
    case ISD::CTPOP: {
      EVT VT = Op.getValueType();
-    EVT ShVT = TLI.getShiftAmountTy();
+    EVT ShVT = TLI.getShiftAmountTy(VT);
      unsigned Len = VT.getSizeInBits();
  
      assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
@@ -2439,7 +2803,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
      Op = DAG.getNode(ISD::SRL, dl, VT,
                       DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
                       DAG.getConstant(Len - 8, ShVT));
-    
+
      return Op;
    }
    case ISD::CTLZ: {
@@ -2453,7 +2817,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
      //
      // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
      EVT VT = Op.getValueType();
-    EVT ShVT = TLI.getShiftAmountTy();
+    EVT ShVT = TLI.getShiftAmountTy(VT);
      unsigned len = VT.getSizeInBits();
      for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
        SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
@@ -2608,8 +2972,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      Results.push_back(DAG.getConstant(0, MVT::i32));
      Results.push_back(Node->getOperand(0));
      break;
+  case ISD::ATOMIC_FENCE:
    case ISD::MEMBARRIER: {
      // If the target didn't lower this, lower it to '__sync_synchronize()' call
+    // FIXME: handle "fence singlethread" more efficiently.
      TargetLowering::ArgListTy Args;
      std::pair<SDValue, SDValue> CallResult =
        TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
@@ -2622,6 +2988,32 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      Results.push_back(CallResult.second);
      break;
    }
+  case ISD::ATOMIC_LOAD: {
+    // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
+    SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
+    SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+                                 cast<AtomicSDNode>(Node)->getMemoryVT(),
+                                 Node->getOperand(0),
+                                 Node->getOperand(1), Zero, Zero,
+                                 cast<AtomicSDNode>(Node)->getMemOperand(),
+                                 cast<AtomicSDNode>(Node)->getOrdering(),
+                                 cast<AtomicSDNode>(Node)->getSynchScope());
+    Results.push_back(Swap.getValue(0));
+    Results.push_back(Swap.getValue(1));
+    break;
+  }
+  case ISD::ATOMIC_STORE: {
+    // There is no libcall for atomic store; fake it with ATOMIC_SWAP.
+    SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+                                 cast<AtomicSDNode>(Node)->getMemoryVT(),
+                                 Node->getOperand(0),
+                                 Node->getOperand(1), Node->getOperand(2),
+                                 cast<AtomicSDNode>(Node)->getMemOperand(),
+                                 cast<AtomicSDNode>(Node)->getOrdering(),
+                                 cast<AtomicSDNode>(Node)->getSynchScope());
+    Results.push_back(Swap.getValue(1));
+    break;
+  }
    // By default, atomic intrinsics are marked Legal and lowered. Targets
    // which don't support them directly, however, may want libcalls, in which
    // case they mark them Expand, and we get here.
@@ -2689,7 +3081,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      // SAR.  However, it is doubtful that any exist.
      EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
      EVT VT = Node->getValueType(0);
-    EVT ShiftAmountTy = TLI.getShiftAmountTy();
+    EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
      if (VT.isVector())
        ShiftAmountTy = VT;
      unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
@@ -2703,7 +3095,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
    }
    case ISD::FP_ROUND_INREG: {
      // The only way we can lower this is to turn it into a TRUNCSTORE,
-    // EXTLOAD pair, targetting a temporary location (a stack slot).
+    // EXTLOAD pair, targeting a temporary location (a stack slot).
  
      // NOTE: there is a choice here between constantly creating new stack
      // slots and always reusing the same one.  We currently always create
@@ -2803,6 +3195,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
    case ISD::EXTRACT_SUBVECTOR:
      Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
      break;
+  case ISD::INSERT_SUBVECTOR:
+    Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+    break;
    case ISD::CONCAT_VECTORS: {
      Results.push_back(ExpandVectorBuildThroughStack(Node));
      break;
@@ -2821,7 +3216,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
  
      EVT VT = Node->getValueType(0);
      EVT EltVT = VT.getVectorElementType();
-    if (getTypeAction(EltVT) == Promote)
+    if (!TLI.isTypeLegal(EltVT))
        EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
      unsigned NumElems = VT.getVectorNumElements();
      SmallVector<SDValue, 8> Ops;
@@ -2850,7 +3245,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
        // 1 -> Hi
        Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
                           DAG.getConstant(OpTy.getSizeInBits()/2,
-                                         TLI.getShiftAmountTy()));
+                    TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
        Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
      } else {
        // 0 -> Lo
@@ -2974,6 +3369,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
                                        RTLIB::REM_F80, RTLIB::REM_PPCF128));
      break;
+  case ISD::FMA:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+                                      RTLIB::FMA_F80, RTLIB::FMA_PPCF128));
+    break;
    case ISD::FP16_TO_FP32:
      Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
      break;
@@ -3026,24 +3425,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
      Tmp2 = Node->getOperand(0);
      Tmp3 = Node->getOperand(1);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, false))) {
        Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
      } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
        // X % Y -> X-X/Y*Y
        Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
        Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
        Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
-    } else if (isSigned) {
+    } else if (isSigned)
        Tmp1 = ExpandIntLibCall(Node, true,
                                RTLIB::SREM_I8,
                                RTLIB::SREM_I16, RTLIB::SREM_I32,
                                RTLIB::SREM_I64, RTLIB::SREM_I128);
-    } else {
+    else
        Tmp1 = ExpandIntLibCall(Node, false,
                                RTLIB::UREM_I8,
                                RTLIB::UREM_I16, RTLIB::UREM_I32,
                                RTLIB::UREM_I64, RTLIB::UREM_I128);
-    }
      Results.push_back(Tmp1);
      break;
    }
@@ -3053,7 +3453,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
      EVT VT = Node->getValueType(0);
      SDVTList VTs = DAG.getVTList(VT, VT);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, true)))
        Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
                           Node->getOperand(1));
      else if (isSigned)
@@ -3082,6 +3484,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      Results.push_back(Tmp1.getValue(1));
      break;
    }
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    // Expand into divrem libcall
+    ExpandDivRemLibCall(Node, Results);
+    break;
    case ISD::MUL: {
      EVT VT = Node->getValueType(0);
      SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3166,6 +3573,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
    case ISD::UMULO:
    case ISD::SMULO: {
      EVT VT = Node->getValueType(0);
+    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
      SDValue LHS = Node->getOperand(0);
      SDValue RHS = Node->getOperand(1);
      SDValue BottomHalf;
@@ -3183,7 +3591,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
        TopHalf = BottomHalf.getValue(1);
      } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
                                                   VT.getSizeInBits() * 2))) {
-      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
        LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
        RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
        Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -3196,7 +3603,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
        // have a libcall big enough.
        // Also, we can fall back to a division in some cases, but that's a big
        // performance hit in the general case.
-      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
        RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
        if (WideVT == MVT::i16)
          LC = RTLIB::MUL_I16;
@@ -3207,17 +3613,30 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
        else if (WideVT == MVT::i128)
          LC = RTLIB::MUL_I128;
        assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
-      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
-      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
-      
-      SDValue Ret = ExpandLibCall(LC, Node, isSigned);
-      BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
-      TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
-                       DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
-      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
+
+      // The high part is obtained by SRA'ing all but one of the bits of low
+      // part.
+      unsigned LoSize = VT.getSizeInBits();
+      SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+      SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+      // Here we're passing the 2 arguments explicitly as 4 arguments that are
+      // pre-lowered to the correct types. This all depends upon WideVT not
+      // being a legal type for the architecture and thus has to be split to
+      // two arguments.
+      SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+      SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                            DAG.getIntPtrConstant(1));
      }
+
      if (isSigned) {
-      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+                             TLI.getShiftAmountTy(BottomHalf.getValueType()));
        Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
        TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
                               ISD::SETNE);
@@ -3235,7 +3654,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
      Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
                         DAG.getConstant(PairTy.getSizeInBits()/2,
-                                       TLI.getShiftAmountTy()));
+                                       TLI.getShiftAmountTy(PairTy)));
      Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
      break;
    }
@@ -3270,7 +3689,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
  
      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
-    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
+    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
                                  MachinePointerInfo::getJumpTable(), MemVT,
                                  false, false, 0);
      Addr = LD;
@@ -3296,9 +3715,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                           Tmp2.getOperand(0), Tmp2.getOperand(1),
                           Node->getOperand(2));
      } else {
+      // We test only the i1 bit.  Skip the AND if UNDEF.
+      Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+        DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+                    DAG.getConstant(1, Tmp2.getValueType()));
        Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
-                         DAG.getCondCode(ISD::SETNE), Tmp2,
-                         DAG.getConstant(0, Tmp2.getValueType()),
+                         DAG.getCondCode(ISD::SETNE), Tmp3,
+                         DAG.getConstant(0, Tmp3.getValueType()),
                           Node->getOperand(2));
      }
      Results.push_back(Tmp1);
@@ -3413,7 +3836,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
      Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
      Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
      Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
-                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
      Results.push_back(Tmp1);
      break;
    }
@@ -3507,9 +3930,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
  
  // SelectionDAG::Legalize - This is the entry point for the file.
  //
-void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
+void SelectionDAG::Legalize() {
    /// run - This is the main entry point to this class.
    ///
-  SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+  SelectionDAGLegalize(*this).LegalizeDAG();
  }
-