implement some more easy hooks.

[oota-llvm.git] / lib / Target / ARM / ARMISelLowering.cpp
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 18207c34a9c22e5c8879efc16b76e0aea3ebd0e8..b82a2209e550bc7f7d15e32ed50efa4d53a44176 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -40,6 +40,7 @@
  #include "llvm/ADT/VectorExtras.h"
  #include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/MathExtras.h"
+#include <sstream>
  using namespace llvm;
  
  static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
@@ -76,9 +77,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
      setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
    if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+  if (ElemTy != MVT::i32) {
+    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
+  }
    setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::SCALAR_TO_VECTOR, VT.getSimpleVT(), Custom);
    setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
    if (VT.isInteger()) {
@@ -99,6 +105,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
      AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
                         PromotedBitwiseVT.getSimpleVT());
    }
+
+  // Neon does not support vector divide/remainder operations.
+  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
  }
  
  void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
@@ -237,6 +251,39 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      addQRTypeForNEON(MVT::v4i32);
      addQRTypeForNEON(MVT::v2i64);
  
+    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
+    // neither Neon nor VFP support any arithmetic operations on it.
+    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
+    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
+    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
+    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
+    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
+    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+    // Neon does not support some operations on v1i64 and v2i64 types.
+    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+
      setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
      setTargetDAGCombine(ISD::SHL);
      setTargetDAGCombine(ISD::SRL);
@@ -345,8 +392,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
  
    // We want to custom lower some of our intrinsics.
    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
-  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
  
    setOperationAction(ISD::SETCC,     MVT::i32, Expand);
    setOperationAction(ISD::SETCC,     MVT::f32, Expand);
@@ -483,31 +528,19 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
    case ARMISD::VDUP:          return "ARMISD::VDUP";
    case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
-  case ARMISD::VLD2D:         return "ARMISD::VLD2D";
-  case ARMISD::VLD3D:         return "ARMISD::VLD3D";
-  case ARMISD::VLD4D:         return "ARMISD::VLD4D";
-  case ARMISD::VST2D:         return "ARMISD::VST2D";
-  case ARMISD::VST3D:         return "ARMISD::VST3D";
-  case ARMISD::VST4D:         return "ARMISD::VST4D";
    case ARMISD::VEXT:          return "ARMISD::VEXT";
    case ARMISD::VREV64:        return "ARMISD::VREV64";
    case ARMISD::VREV32:        return "ARMISD::VREV32";
    case ARMISD::VREV16:        return "ARMISD::VREV16";
-  case ARMISD::VZIP32:        return "ARMISD::VZIP32";
-  case ARMISD::VZIP16:        return "ARMISD::VZIP16";
-  case ARMISD::VZIP8:         return "ARMISD::VZIP8";
-  case ARMISD::VUZP32:        return "ARMISD::VUZP32";
-  case ARMISD::VUZP16:        return "ARMISD::VUZP16";
-  case ARMISD::VUZP8:         return "ARMISD::VUZP8";
-  case ARMISD::VTRN32:        return "ARMISD::VTRN32";
-  case ARMISD::VTRN16:        return "ARMISD::VTRN16";
-  case ARMISD::VTRN8:         return "ARMISD::VTRN8";
+  case ARMISD::VZIP:          return "ARMISD::VZIP";
+  case ARMISD::VUZP:          return "ARMISD::VUZP";
+  case ARMISD::VTRN:          return "ARMISD::VTRN";
    }
  }
  
  /// getFunctionAlignment - Return the Log2 alignment of this function.
  unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
-  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
+  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
  }
  
  //===----------------------------------------------------------------------===//
@@ -531,12 +564,9 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
    }
  }
  
-/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
-/// returns true if the operands should be inverted to form the proper
-/// comparison.
-static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
+static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
                          ARMCC::CondCodes &CondCode2) {
-  bool Invert = false;
    CondCode2 = ARMCC::AL;
    switch (CC) {
    default: llvm_unreachable("Unknown FP condition!");
@@ -547,7 +577,7 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
    case ISD::SETGE:
    case ISD::SETOGE: CondCode = ARMCC::GE; break;
    case ISD::SETOLT: CondCode = ARMCC::MI; break;
-  case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break;
+  case ISD::SETOLE: CondCode = ARMCC::LS; break;
    case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
    case ISD::SETO:   CondCode = ARMCC::VC; break;
    case ISD::SETUO:  CondCode = ARMCC::VS; break;
@@ -561,7 +591,6 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
    case ISD::SETNE:
    case ISD::SETUNE: CondCode = ARMCC::NE; break;
    }
-  return Invert;
  }
  
  //===----------------------------------------------------------------------===//
@@ -697,7 +726,7 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
  
  /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
  /// given CallingConvention value.
-CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC,
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
                                                   bool Return,
                                                   bool isVarArg) const {
    switch (CC) {
@@ -727,7 +756,7 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC,
  /// appropriate copies out of appropriate physical registers.
  SDValue
  ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
-                                   unsigned CallConv, bool isVarArg,
+                                   CallingConv::ID CallConv, bool isVarArg,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) {
@@ -858,7 +887,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
  /// nodes.
  SDValue
  ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
-                             unsigned CallConv, bool isVarArg,
+                             CallingConv::ID CallConv, bool isVarArg,
                               bool isTailCall,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -981,8 +1010,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
      isLocalARMFunc = !Subtarget->isThumb() && !isExt;
      // tBX takes a register source operand.
      if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
-      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
-                                                           ARMCP::CPStub, 4);
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+                                                           ARMPCLabelIndex,
+                                                           ARMCP::CPValue, 4);
        SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
        CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
        Callee = DAG.getLoad(getPointerTy(), dl,
@@ -1001,8 +1031,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
      const char *Sym = S->getSymbol();
      if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
        ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
-                                                          Sym, ARMPCLabelIndex,
-                                                           ARMCP::CPStub, 4);
+                                                       Sym, ARMPCLabelIndex, 4);
        SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
        CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
        Callee = DAG.getLoad(getPointerTy(), dl,
@@ -1062,7 +1091,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
  
  SDValue
  ARMTargetLowering::LowerReturn(SDValue Chain,
-                               unsigned CallConv, bool isVarArg,
+                               CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 DebugLoc dl, SelectionDAG &DAG) {
  
@@ -1179,8 +1208,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
    EVT PtrVT = getPointerTy();
    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
    ARMConstantPoolValue *CPV =
-    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
-                             PCAdj, "tlsgd", true);
+    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+                             ARMCP::CPValue, PCAdj, "tlsgd", true);
    SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
    Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
    Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0);
@@ -1221,8 +1250,8 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
      // initial exec model
      unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
      ARMConstantPoolValue *CPV =
-      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
-                               PCAdj, "gottpoff", true);
+      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+                               ARMCP::CPValue, PCAdj, "gottpoff", true);
      Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
      Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
@@ -1234,8 +1263,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
    } else {
      // local exec model
-    ARMConstantPoolValue *CPV =
-      new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff");
+    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
      Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
      Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
@@ -1269,52 +1297,40 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
    if (RelocM == Reloc::PIC_) {
      bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
      ARMConstantPoolValue *CPV =
-      new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT");
+      new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
      SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
      SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
-                                 CPAddr, NULL, 0);
+                                 CPAddr,
+                                 PseudoSourceValue::getConstantPool(), 0);
      SDValue Chain = Result.getValue(1);
      SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
      Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
      if (!UseGOTOFF)
-      Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
+      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+                           PseudoSourceValue::getGOT(), 0);
      return Result;
    } else {
      SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                       PseudoSourceValue::getConstantPool(), 0);
    }
  }
  
-/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
-/// even in non-static mode.
-static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) {
-  // If symbol visibility is hidden, the extra load is not needed if
-  // the symbol is definitely defined in the current translation unit.
-  bool isDecl = GV->isDeclaration() || GV->hasAvailableExternallyLinkage();
-  if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage()))
-    return false;
-  return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker());
-}
-
  SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
                                                      SelectionDAG &DAG) {
    EVT PtrVT = getPointerTy();
    DebugLoc dl = Op.getDebugLoc();
    GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
-  bool IsIndirect = GVIsIndirectSymbol(GV, RelocM);
    SDValue CPAddr;
    if (RelocM == Reloc::Static)
      CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
    else {
-    unsigned PCAdj = (RelocM != Reloc::PIC_)
-      ? 0 : (Subtarget->isThumb() ? 4 : 8);
-    ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr
-      : ARMCP::CPValue;
-    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
-                                                         Kind, PCAdj);
+    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
      CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
    }
    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1326,7 +1342,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
    }
-  if (IsIndirect)
+
+  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
      Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
  
    return Result;
@@ -1341,66 +1358,15 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
                                                         "_GLOBAL_OFFSET_TABLE_",
-                                                       ARMPCLabelIndex,
-                                                       ARMCP::CPValue, PCAdj);
+                                                       ARMPCLabelIndex, PCAdj);
    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                               PseudoSourceValue::getConstantPool(), 0);
    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
    return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
  }
  
-static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG,
-                                     unsigned Opcode) {
-  SDNode *Node = Op.getNode();
-  EVT VT = Node->getValueType(0);
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (!VT.is64BitVector())
-    return SDValue(); // unimplemented
-
-  SDValue Ops[] = { Node->getOperand(0),
-                    Node->getOperand(2) };
-  return DAG.getNode(Opcode, dl, Node->getVTList(), Ops, 2);
-}
-
-static SDValue LowerNeonVSTIntrinsic(SDValue Op, SelectionDAG &DAG,
-                                     unsigned Opcode, unsigned NumVecs) {
-  SDNode *Node = Op.getNode();
-  EVT VT = Node->getOperand(3).getValueType();
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (!VT.is64BitVector())
-    return SDValue(); // unimplemented
-
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(Node->getOperand(0));
-  Ops.push_back(Node->getOperand(2));
-  for (unsigned N = 0; N < NumVecs; ++N)
-    Ops.push_back(Node->getOperand(N + 3));
-  return DAG.getNode(Opcode, dl, MVT::Other, Ops.data(), Ops.size());
-}
-
-SDValue
-ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
-  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-  switch (IntNo) {
-  case Intrinsic::arm_neon_vld2:
-    return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D);
-  case Intrinsic::arm_neon_vld3:
-    return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D);
-  case Intrinsic::arm_neon_vld4:
-    return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D);
-  case Intrinsic::arm_neon_vst2:
-    return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST2D, 2);
-  case Intrinsic::arm_neon_vst3:
-    return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST3D, 3);
-  case Intrinsic::arm_neon_vst4:
-    return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST4D, 4);
-  default: return SDValue();    // Don't custom lower most intrinsics.
-  }
-}
-
  SDValue
  ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -1412,10 +1378,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
      return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
    }
    case Intrinsic::eh_sjlj_lsda: {
-    // blah. horrible, horrible hack with the forced magic name.
-    // really need to clean this up. It belongs in the target-independent
-    // layer somehow that doesn't require the coupling with the asm
-    // printer.
      MachineFunction &MF = DAG.getMachineFunction();
      EVT PtrVT = getPointerTy();
      DebugLoc dl = Op.getDebugLoc();
@@ -1423,14 +1385,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
      SDValue CPAddr;
      unsigned PCAdj = (RelocM != Reloc::PIC_)
        ? 0 : (Subtarget->isThumb() ? 4 : 8);
-    ARMCP::ARMCPKind Kind = ARMCP::CPValue;
-    // Save off the LSDA name for the AsmPrinter to use when it's time
-    // to emit the table
-    std::string LSDAName = "L_lsda_";
-    LSDAName += MF.getFunction()->getName();
      ARMConstantPoolValue *CPV =
-      new ARMConstantPoolValue(*DAG.getContext(), LSDAName.c_str(), 
-                               ARMPCLabelIndex, Kind, PCAdj);
+      new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
+                               ARMCP::CPLSDA, PCAdj);
      CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
      SDValue Result =
@@ -1542,7 +1499,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
  
  SDValue
  ARMTargetLowering::LowerFormalArguments(SDValue Chain,
-                                        unsigned CallConv, bool isVarArg,
+                                        CallingConv::ID CallConv, bool isVarArg,
                                          const SmallVectorImpl<ISD::InputArg>
                                            &Ins,
                                          DebugLoc dl, SelectionDAG &DAG,
@@ -1806,8 +1763,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
    }
  
    ARMCC::CondCodes CondCode, CondCode2;
-  if (FPCCToARMCC(CC, CondCode, CondCode2))
-    std::swap(TrueVal, FalseVal);
+  FPCCToARMCC(CC, CondCode, CondCode2);
  
    SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
@@ -1843,9 +1799,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
  
    assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
    ARMCC::CondCodes CondCode, CondCode2;
-  if (FPCCToARMCC(CC, CondCode, CondCode2))
-    // Swap the LHS/RHS of the comparison if needed.
-    std::swap(LHS, RHS);
+  FPCCToARMCC(CC, CondCode, CondCode2);
  
    SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
    SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
@@ -2078,14 +2032,19 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
    // will be implemented with the NEON VNEG instruction.  However, VNEG does
    // not support i64 elements, so sometimes the zero vectors will need to be
    // explicitly constructed.  For those cases, and potentially other uses in
-  // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+  // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
    // to their dest type.  This ensures they get CSE'd.
    SDValue Vec;
-  SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
-  if (VT.getSizeInBits() == 64)
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
-  else
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
+  SmallVector<SDValue, 8> Ops;
+  MVT TVT;
+
+  if (VT.getSizeInBits() == 64) {
+    Ops.assign(8, Cst); TVT = MVT::v8i8;
+  } else {
+    Ops.assign(16, Cst); TVT = MVT::v16i8;
+  }
+  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
  
    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
  }
@@ -2095,14 +2054,19 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
  static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
    assert(VT.isVector() && "Expected a vector type");
  
-  // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
-  // type.  This ensures they get CSE'd.
+  // Always build ones vectors as <16 x i32> or <8 x i32> bitcasted to their
+  // dest type. This ensures they get CSE'd.
    SDValue Vec;
-  SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
-  if (VT.getSizeInBits() == 64)
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
-  else
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
+  SmallVector<SDValue, 8> Ops;
+  MVT TVT;
+
+  if (VT.getSizeInBits() == 64) {
+    Ops.assign(8, Cst); TVT = MVT::v8i8;
+  } else {
+    Ops.assign(16, Cst); TVT = MVT::v16i8;
+  }
+  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
  
    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
  }
@@ -2140,8 +2104,11 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
                         N->getOperand(0), NegatedCount);
    }
  
-  assert(VT == MVT::i64 &&
-         (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+  // We can get here for a node like i32 = ISD::SHL i32, i64
+  if (VT != MVT::i64)
+    return SDValue();
+
+  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
           "Unknown shift to lower!");
  
    // We only lower SRA, SRL of 1 here, all others use generic lowering.
@@ -2409,6 +2376,53 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
    return true;
  }
  
+static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i < NumElts; i += 2) {
+    if ((unsigned) M[i] != i + WhichResult ||
+        (unsigned) M[i+1] != i + NumElts + WhichResult)
+      return false;
+  }
+  return true;
+}
+
+static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if ((unsigned) M[i] != 2 * i + WhichResult)
+      return false;
+  }
+
+  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32)
+    return false;
+
+  return true;
+}
+
+static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  unsigned Idx = WhichResult * NumElts / 2;
+  for (unsigned i = 0; i != NumElts; i += 2) {
+    if ((unsigned) M[i] != Idx ||
+        (unsigned) M[i+1] != Idx + NumElts)
+      return false;
+    Idx += 1;
+  }
+
+  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32)
+    return false;
+
+  return true;
+}
+
  static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
    // Canonicalize all-zeros and all-ones vectors.
    ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
@@ -2456,10 +2470,12 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
    unsigned SplatBitSize;
    bool HasAnyUndefs;
    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
-    SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
-                              SplatUndef.getZExtValue(), SplatBitSize, DAG);
-    if (Val.getNode())
-      return BuildSplat(Val, VT, DAG, dl);
+    if (SplatBitSize <= 64) {
+      SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
+                                SplatUndef.getZExtValue(), SplatBitSize, DAG);
+      if (Val.getNode())
+        return BuildSplat(Val, VT, DAG, dl);
+    }
    }
  
    // If there are only 2 elements in a 128-bit vector, insert them into an
@@ -2510,13 +2526,16 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
    }
  
    bool ReverseVEXT;
-  unsigned Imm;
+  unsigned Imm, WhichResult;
  
    return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
            isVREVMask(M, VT, 64) ||
            isVREVMask(M, VT, 32) ||
            isVREVMask(M, VT, 16) ||
-          isVEXTMask(M, VT, ReverseVEXT, Imm));
+          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+          isVTRNMask(M, VT, WhichResult) ||
+          isVUZPMask(M, VT, WhichResult) ||
+          isVZIPMask(M, VT, WhichResult));
  }
  
  /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -2566,7 +2585,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
    case OP_VDUP2:
    case OP_VDUP3:
      return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
-                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0+1, MVT::i32));
+                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
    case OP_VEXT1:
    case OP_VEXT2:
    case OP_VEXT3:
@@ -2575,19 +2594,16 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
                         DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
    case OP_VUZPL:
    case OP_VUZPR:
-    return DAG.getNode(VT.is64BitVector() ? ARMISD::VUZP16 : ARMISD::VUZP32,
-                       dl, DAG.getVTList(VT, VT),
+    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
                         OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
    case OP_VZIPL:
    case OP_VZIPR:
-    return DAG.getNode(VT.is64BitVector() ? ARMISD::VZIP16 : ARMISD::VZIP32,
-                       dl, DAG.getVTList(VT, VT),
+    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
                         OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
    case OP_VTRNL:
    case OP_VTRNR:
-    return DAG.getNode(VT.is64BitVector() ? ARMISD::VTRN16 : ARMISD::VTRN32,
-                       dl, DAG.getVTList(VT, VT),
-                       OpLHS, OpRHS).getValue(0);
+    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
    }
  }
  
@@ -2619,11 +2635,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
    bool ReverseVEXT;
    unsigned Imm;
    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
-    SDValue Op0 = SVN->getOperand(0);
-    SDValue Op1 = SVN->getOperand(1);
      if (ReverseVEXT)
-      std::swap(Op0, Op1);
-    return DAG.getNode(ARMISD::VEXT, dl, VT, Op0, Op1,
+      std::swap(V1, V2);
+    return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
                         DAG.getConstant(Imm, MVT::i32));
    }
  
@@ -2634,6 +2648,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
    if (isVREVMask(ShuffleMask, VT, 16))
      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
  
+  // Check for Neon shuffles that modify both input vectors in place.
+  // If both results are used, i.e., if there are two shuffles with the same
+  // source operands and with masks corresponding to both results of one of
+  // these operations, DAG memoization will ensure that a single node is
+  // used for both shuffles.
+  unsigned WhichResult;
+  if (isVTRNMask(ShuffleMask, VT, WhichResult))
+    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                       V1, V2).getValue(WhichResult);
+  if (isVUZPMask(ShuffleMask, VT, WhichResult))
+    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                       V1, V2).getValue(WhichResult);
+  if (isVZIPMask(ShuffleMask, VT, WhichResult))
+    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                       V1, V2).getValue(WhichResult);
+
+  // If the shuffle is not directly supported and it has 4 elements, use
+  // the PerfectShuffle-generated table to synthesize it from other shuffles.
    if (VT.getVectorNumElements() == 4 &&
        (VT.is128BitVector() || VT.is64BitVector())) {
      unsigned PFIndexes[4];
@@ -2658,20 +2690,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
    return SDValue();
  }
  
-static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  return Op;
-}
-
  static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
    EVT VT = Op.getValueType();
    DebugLoc dl = Op.getDebugLoc();
-  assert((VT == MVT::i8 || VT == MVT::i16) &&
-         "unexpected type for custom-lowering vector extract");
    SDValue Vec = Op.getOperand(0);
    SDValue Lane = Op.getOperand(1);
-  Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
-  Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT));
-  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+  assert(VT == MVT::i32 &&
+         Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
+         "unexpected type for custom-lowering vector extract");
+  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
  }
  
  static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
@@ -2715,8 +2742,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
    case ISD::RETURNADDR:    break;
    case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
    case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
-  case ISD::INTRINSIC_VOID:
-  case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
    case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
    case ISD::SHL:
@@ -2725,7 +2750,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
    case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
    case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG);
    case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
-  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
    case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
    case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
    }
@@ -2760,7 +2784,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
  
  MachineBasicBlock *
  ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB) const {
+                                               MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
    DebugLoc dl = MI->getDebugLoc();
    switch (MI->getOpcode()) {
@@ -2791,12 +2816,15 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      F->insert(It, sinkMBB);
      // Update machine-CFG edges by first adding all successors of the current
      // block to the new block which will contain the Phi node for the select.
-    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
-        e = BB->succ_end(); i != e; ++i)
-      sinkMBB->addSuccessor(*i);
+    // Also inform sdisel of the edge changes.
+    for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
+           E = BB->succ_end(); I != E; ++I) {
+      EM->insert(std::make_pair(*I, sinkMBB));
+      sinkMBB->addSuccessor(*I);
+    }
      // Next, remove all successors of the current block, and add the true
      // and fallthrough blocks as its successors.
-    while(!BB->succ_empty())
+    while (!BB->succ_empty())
        BB->removeSuccessor(BB->succ_begin());
      BB->addSuccessor(copy0MBB);
      BB->addSuccessor(sinkMBB);
@@ -3941,3 +3969,9 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
    return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
                                                        Ops, DAG);
  }
+
+bool
+ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The ARM target isn't yet aware of offsets.
+  return false;
+}