Perform bitconvert to proper type

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 32f247df9773a32d997dac40f7cf4f6fa7612e69..298a5a837831566bb7f8bdf9571e3bf24cd71b95 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -22,7 +22,9 @@
  #include "llvm/GlobalAlias.h"
  #include "llvm/GlobalVariable.h"
  #include "llvm/Function.h"
+#include "llvm/Instructions.h"
  #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
  #include "llvm/ADT/BitVector.h"
  #include "llvm/ADT/VectorExtras.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -34,6 +36,7 @@
  #include "llvm/Support/MathExtras.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
  #include "llvm/Target/TargetOptions.h"
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/ADT/StringExtras.h"
@@ -48,8 +51,23 @@ DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
  static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
                         SDValue V2);
  
+static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+  switch (TM.getSubtarget<X86Subtarget>().TargetType) {
+  default: llvm_unreachable("unknown subtarget type");
+  case X86Subtarget::isDarwin:
+    return new TargetLoweringObjectFileMachO();
+  case X86Subtarget::isELF:
+    return new TargetLoweringObjectFileELF();
+  case X86Subtarget::isMingw:
+  case X86Subtarget::isCygwin:
+  case X86Subtarget::isWindows:
+    return new TargetLoweringObjectFileCOFF();
+  }
+  
+}
+
  X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
-  : TargetLowering(TM) {
+  : TargetLowering(TM, createTLOF(TM)) {
    Subtarget = &TM.getSubtarget<X86Subtarget>();
    X86ScalarSSEf64 = Subtarget->hasSSE2();
    X86ScalarSSEf32 = Subtarget->hasSSE1();
@@ -64,7 +82,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
    setShiftAmountType(MVT::i8);
    setBooleanContents(ZeroOrOneBooleanContent);
    setSchedulingPreference(SchedulingForRegPressure);
-  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
    setStackPointerRegisterToSaveRestore(X86StackPtr);
  
    if (Subtarget->isTargetDarwin()) {
@@ -639,6 +656,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      setOperationAction(ISD::SELECT,             MVT::v4i16, Promote);
      setOperationAction(ISD::SELECT,             MVT::v2i32, Promote);
      setOperationAction(ISD::SELECT,             MVT::v1i64, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i8, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v4i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v2i32, Custom);
    }
  
    if (!UseSoftFloat && Subtarget->hasSSE1()) {
@@ -1021,7 +1041,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                        SelectionDAG &DAG) const {
    if (usesGlobalOffsetTable())
      return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
-  if (!Subtarget->isPICStyleRIPRel())
+  if (!Subtarget->is64Bit())
      // This doesn't have DebugLoc associated with it, but is not really the
      // same as a Register.
      return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
@@ -1048,7 +1068,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
    SmallVector<CCValAssign, 16> RVLocs;
    unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
    bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext());
    CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86);
  
    // If this is the first return lowered for this function, add the regs to the
@@ -1174,7 +1194,8 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
    SmallVector<CCValAssign, 16> RVLocs;
    bool isVarArg = TheCall->isVarArg();
    bool Is64Bit = Subtarget->is64Bit();
-  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
    CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
  
    SmallVector<SDValue, 8> ResultVals;
@@ -1207,7 +1228,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
                                     MVT::v2i64, InFlag).getValue(1);
          Val = Chain.getValue(0);
          Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
-                          Val, DAG.getConstant(0, MVT::i64));        
+                          Val, DAG.getConstant(0, MVT::i64));
        } else {
          Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                     MVT::i64, InFlag).getValue(1);
@@ -1259,7 +1280,7 @@ static bool CallIsStructReturn(CallSDNode *TheCall) {
    return TheCall->getArgFlags(0).isSRet();
  }
  
-/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct
+/// ArgsAreStructReturn - Determines whether a function uses struct
  /// return semantics.
  static bool ArgsAreStructReturn(SDValue Op) {
    unsigned NumArgs = Op.getNode()->getNumValues() - 1;
@@ -1269,9 +1290,8 @@ static bool ArgsAreStructReturn(SDValue Op) {
    return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
  }
  
-/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
-/// the callee to pop its own arguments. Callee pop is necessary to support tail
-/// calls.
+/// IsCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
  bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
    if (IsVarArg)
      return false;
@@ -1319,23 +1339,6 @@ X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) {
  }
  
  
-/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
-/// in a register before calling.
-bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
-  return !IsTailCall && !Is64Bit &&
-    getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-    Subtarget->isPICStyleGOT();
-}
-
-/// CallRequiresFnAddressInReg - Check whether the call requires the function
-/// address to be loaded in a register.
-bool
-X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
-  return !Is64Bit && IsTailCall &&
-    getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-    Subtarget->isPICStyleGOT();
-}
-
  /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
  /// by "Src" to address "Dst" with size and alignment information specified by
  /// the specific parameter attribute. The copy will be passed as a byval
@@ -1400,11 +1403,12 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
  
    // Assign locations to all of the incoming arguments.
    SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext());
    CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC));
  
    SmallVector<SDValue, 8> ArgValues;
    unsigned LastVal = ~0U;
+  SDValue ArgValue;
    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
      CCValAssign &VA = ArgLocs[i];
      // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
@@ -1426,27 +1430,13 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
          RC = X86::FR64RegisterClass;
        else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
          RC = X86::VR128RegisterClass;
-      else if (RegVT.isVector()) {
-        assert(RegVT.getSizeInBits() == 64);
-        if (!Is64Bit)
-          RC = X86::VR64RegisterClass;     // MMX values are passed in MMXs.
-        else {
-          // Darwin calling convention passes MMX values in either GPRs or
-          // XMMs in x86-64. Other targets pass them in memory.
-          if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) {
-            RC = X86::VR128RegisterClass;  // MMX values are passed in XMMs.
-            RegVT = MVT::v2i64;
-          } else {
-            RC = X86::GR64RegisterClass;   // v1i64 values are passed in GPRs.
-            RegVT = MVT::i64;
-          }
-        }
-      } else {
-        assert(0 && "Unknown argument type!");
-      }
+      else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+        RC = X86::VR64RegisterClass;
+      else
+        llvm_unreachable("Unknown argument type!");
  
-      unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
  
        // If this is an 8 or 16-bit value, it is really passed promoted to 32
        // bits.  Insert an assert[sz]ext to capture this, then truncate to the
@@ -1457,33 +1447,34 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
        else if (VA.getLocInfo() == CCValAssign::ZExt)
          ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
                                 DAG.getValueType(VA.getValVT()));
+      else if (VA.getLocInfo() == CCValAssign::BCvt)
+        ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
  
-      if (VA.getLocInfo() != CCValAssign::Full)
-        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
-
-      // Handle MMX values passed in GPRs.
-      if (Is64Bit && RegVT != VA.getLocVT()) {
-        if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass)
-          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
-        else if (RC == X86::VR128RegisterClass) {
+      if (VA.isExtInLoc()) {
+        // Handle MMX values passed in XMM regs.
+        if (RegVT.isVector()) {
            ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
                                   ArgValue, DAG.getConstant(0, MVT::i64));
-          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
-        }
+          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+        } else
+          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
        }
-
-      ArgValues.push_back(ArgValue);
      } else {
        assert(VA.isMemLoc());
-      ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
+      ArgValue = LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i);
      }
+
+    // If value is passed via pointer - do a load.
+    if (VA.getLocInfo() == CCValAssign::Indirect)
+      ArgValue = DAG.getLoad(VA.getValVT(), dl, Root, ArgValue, NULL, 0);
+
+    ArgValues.push_back(ArgValue);
    }
  
    // The x86-64 ABI for returning structs by value requires that we copy
    // the sret argument into %rax for the return. Save the argument into
    // a virtual register so that we can access it from the return points.
-  if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
-    MachineFunction &MF = DAG.getMachineFunction();
+  if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
      X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
      unsigned Reg = FuncInfo->getSRetReturnReg();
      if (!Reg) {
@@ -1626,8 +1617,9 @@ X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
                                      const CCValAssign &VA,
                                      SDValue Chain,
                                      SDValue Arg, ISD::ArgFlagsTy Flags) {
+  const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
    DebugLoc dl = TheCall->getDebugLoc();
-  unsigned LocMemOffset = VA.getLocMemOffset();
+  unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
    SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
    PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
    if (Flags.isByVal()) {
@@ -1695,7 +1687,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
  
    // Analyze operands of the call, assigning locations to each operand.
    SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext());
    CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC));
  
    // Get a count of how many bytes are to be pushed on the stack.
@@ -1731,48 +1723,45 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
    // of tail call optimization arguments are handle later.
    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
      CCValAssign &VA = ArgLocs[i];
+    MVT RegVT = VA.getLocVT();
      SDValue Arg = TheCall->getArg(i);
      ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
      bool isByVal = Flags.isByVal();
  
      // Promote the value if needed.
      switch (VA.getLocInfo()) {
-    default: assert(0 && "Unknown loc info!");
+    default: llvm_unreachable("Unknown loc info!");
      case CCValAssign::Full: break;
      case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
        break;
      case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
        break;
      case CCValAssign::AExt:
-      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+        // Special case: passing MMX values in XMM registers.
+        Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+        Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+        Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+      } else
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
+      break;
+    case CCValAssign::Indirect: {
+      // Store the argument.
+      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+      Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
+                           PseudoSourceValue::getFixedStack(FI), 0);
+      Arg = SpillSlot;
        break;
      }
+    }
  
      if (VA.isRegLoc()) {
-      if (Is64Bit) {
-        MVT RegVT = VA.getLocVT();
-        if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
-          switch (VA.getLocReg()) {
-          default:
-            break;
-          case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX:
-          case X86::R8: {
-            // Special case: passing MMX values in GPR registers.
-            Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
-            break;
-          }
-          case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
-          case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: {
-            // Special case: passing MMX values in XMM registers.
-            Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
-            Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
-            Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
-            break;
-          }
-          }
-      }
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
      } else {
        if (!IsTailCall || (IsTailCall && isByVal)) {
@@ -1802,30 +1791,34 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
        InFlag = Chain.getValue(1);
      }
  
-  // ELF / PIC requires GOT in the EBX register before function calls via PLT
-  // GOT pointer.
-  if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) {
-    Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
-                             DAG.getNode(X86ISD::GlobalBaseReg,
-                                         DebugLoc::getUnknownLoc(),
-                                         getPointerTy()),
-                             InFlag);
-    InFlag = Chain.getValue(1);
-  }
-  // If we are tail calling and generating PIC/GOT style code load the address
-  // of the callee into ecx. The value in ecx is used as target of the tail
-  // jump. This is done to circumvent the ebx/callee-saved problem for tail
-  // calls on PIC/GOT architectures. Normally we would just put the address of
-  // GOT into ebx and then call target@PLT. But for tail callss ebx would be
-  // restored (since ebx is callee saved) before jumping to the target@PLT.
-  if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
-    // Note: The actual moving to ecx is done further down.
-    GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-    if (G && !G->getGlobal()->hasHiddenVisibility() &&
-        !G->getGlobal()->hasProtectedVisibility())
-      Callee =  LowerGlobalAddress(Callee, DAG);
-    else if (isa<ExternalSymbolSDNode>(Callee))
-      Callee = LowerExternalSymbol(Callee,DAG);
+  
+  if (Subtarget->isPICStyleGOT()) {
+    // ELF / PIC requires GOT in the EBX register before function calls via PLT
+    // GOT pointer.
+    if (!IsTailCall) {
+      Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+                               DAG.getNode(X86ISD::GlobalBaseReg,
+                                           DebugLoc::getUnknownLoc(),
+                                           getPointerTy()),
+                               InFlag);
+      InFlag = Chain.getValue(1);
+    } else {
+      // If we are tail calling and generating PIC/GOT style code load the
+      // address of the callee into ECX. The value in ecx is used as target of
+      // the tail jump. This is done to circumvent the ebx/callee-saved problem
+      // for tail calls on PIC/GOT architectures. Normally we would just put the
+      // address of GOT into ebx and then call target@PLT. But for tail calls
+      // ebx would be restored (since ebx is callee saved) before jumping to the
+      // target@PLT.
+
+      // Note: The actual moving to ECX is done further down.
+      GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+      if (G && !G->getGlobal()->hasHiddenVisibility() &&
+          !G->getGlobal()->hasProtectedVisibility())
+        Callee = LowerGlobalAddress(Callee, DAG);
+      else if (isa<ExternalSymbolSDNode>(Callee))
+        Callee = LowerExternalSymbol(Callee, DAG);
+    }
    }
  
    if (Is64Bit && isVarArg) {
@@ -1913,12 +1906,48 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      // We should use extra load for direct calls to dllimported functions in
      // non-JIT mode.
-    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
-                                        getTargetMachine(), true))
-      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(),
-                                          G->getOffset());
+    GlobalValue *GV = G->getGlobal();
+    if (!GV->hasDLLImportLinkage()) {
+      unsigned char OpFlags = 0;
+    
+      // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+      // external symbols most go through the PLT in PIC mode.  If the symbol
+      // has hidden or protected visibility, or if it is static or local, then
+      // we don't need to use the PLT - we can directly call it.
+      if (Subtarget->isTargetELF() &&
+          getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+          GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+        OpFlags = X86II::MO_PLT;
+      } else if (Subtarget->isPICStyleStubAny() &&
+               (GV->isDeclaration() || GV->isWeakForLinker()) &&
+               Subtarget->getDarwinVers() < 9) {
+        // PC-relative references to external symbols should go through $stub,
+        // unless we're building with the leopard linker or later, which
+        // automatically synthesizes these stubs.
+        OpFlags = X86II::MO_DARWIN_STUB;
+      }
+
+      Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(),
+                                          G->getOffset(), OpFlags);
+    }
    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+    unsigned char OpFlags = 0;
+
+    // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
+    // symbols should go through the PLT.
+    if (Subtarget->isTargetELF() &&
+        getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+      OpFlags = X86II::MO_PLT;
+    } else if (Subtarget->isPICStyleStubAny() &&
+             Subtarget->getDarwinVers() < 9) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = X86II::MO_DARWIN_STUB;
+    }
+      
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+                                         OpFlags);
    } else if (IsTailCall) {
      unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
  
@@ -1927,7 +1956,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
                               Callee,InFlag);
      Callee = DAG.getRegister(Opc, getPointerTy());
      // Add register as live out.
-    DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
+    MF.getRegInfo().addLiveOut(Opc);
    }
  
    // Returns a chain & a flag for retval copy to use.
@@ -1957,9 +1986,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
                                    RegsToPass[i].second.getValueType()));
  
    // Add an implicit use GOT pointer in EBX.
-  if (!IsTailCall && !Is64Bit &&
-      getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      Subtarget->isPICStyleGOT())
+  if (!IsTailCall && Subtarget->isPICStyleGOT())
      Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
  
    // Add an implicit use of AL for x86 vararg functions.
@@ -2072,22 +2099,11 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
      return false;
  
    if (CheckTailCallReturnConstraints(TheCall, Ret)) {
-    MachineFunction &MF = DAG.getMachineFunction();
-    unsigned CallerCC = MF.getFunction()->getCallingConv();
-    unsigned CalleeCC= TheCall->getCallingConv();
-    if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
-      SDValue Callee = TheCall->getCallee();
-      // On x86/32Bit PIC/GOT  tail calls are supported.
-      if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
-          !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit())
-        return true;
-
-      // Can only do local tail calls (in same module, hidden or protected) on
-      // x86_64 PIC/GOT at the moment.
-      if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-        return G->getGlobal()->hasHiddenVisibility()
-            || G->getGlobal()->hasProtectedVisibility();
-    }
+    unsigned CallerCC =
+      DAG.getMachineFunction().getFunction()->getCallingConv();
+    unsigned CalleeCC = TheCall->getCallingConv();
+    if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC)
+      return true;
    }
  
    return false;
@@ -2156,7 +2172,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
      }
  
      switch (SetCCOpcode) {
-    default: assert(0 && "Invalid integer condition!");
+    default: llvm_unreachable("Invalid integer condition!");
      case ISD::SETEQ:  return X86::COND_E;
      case ISD::SETGT:  return X86::COND_G;
      case ISD::SETGE:  return X86::COND_GE;
@@ -2196,7 +2212,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
    //  1 | 0 | 0 | X == Y
    //  1 | 1 | 1 | unordered
    switch (SetCCOpcode) {
-  default: assert(0 && "Condcode should be pre-legalized away");
+  default: llvm_unreachable("Condcode should be pre-legalized away");
    case ISD::SETUEQ:
    case ISD::SETEQ:   return X86::COND_E;
    case ISD::SETOLT:              // flipped
@@ -2715,6 +2731,15 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
    return Mask;
  }
  
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+bool X86::isZeroNode(SDValue Elt) {
+  return ((isa<ConstantSDNode>(Elt) &&
+           cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
+          (isa<ConstantFPSDNode>(Elt) &&
+           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
  /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
  /// their permute mask.
  static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
@@ -2821,15 +2846,6 @@ static bool isSplatVector(SDNode *N) {
    return true;
  }
  
-/// isZeroNode - Returns true if Elt is a constant zero or a floating point
-/// constant +0.0.
-static inline bool isZeroNode(SDValue Elt) {
-  return ((isa<ConstantSDNode>(Elt) &&
-           cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
-          (isa<ConstantFPSDNode>(Elt) &&
-           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
-}
-
  /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
  /// to an zero vector. 
  /// FIXME: move to dag combiner / method on ShuffleVectorSDNode
@@ -2843,13 +2859,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
        unsigned Opc = V2.getOpcode();
        if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
          continue;
-      if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems)))
+      if (Opc != ISD::BUILD_VECTOR ||
+          !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
          return false;
      } else if (Idx >= 0) {
        unsigned Opc = V1.getOpcode();
        if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
          continue;
-      if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx)))
+      if (Opc != ISD::BUILD_VECTOR ||
+          !X86::isZeroNode(V1.getOperand(Idx)))
          return false;
      }
    }
@@ -3017,7 +3035,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
        continue;
      }
      SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
-    if (Elt.getNode() && isZeroNode(Elt))
+    if (Elt.getNode() && X86::isZeroNode(Elt))
        ++NumZeros;
      else
        break;
@@ -3190,7 +3208,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
      if (Elt.getOpcode() != ISD::Constant &&
          Elt.getOpcode() != ISD::ConstantFP)
        IsAllConstants = false;
-    if (isZeroNode(Elt))
+    if (X86::isZeroNode(Elt))
        NumZero++;
      else {
        NonZeros |= (1 << i);
@@ -3267,7 +3285,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
  
      // Is it a vector logical left shift?
      if (NumElems == 2 && Idx == 1 &&
-        isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
+        X86::isZeroNode(Op.getOperand(0)) &&
+        !X86::isZeroNode(Op.getOperand(1))) {
        unsigned NumBits = VT.getSizeInBits();
        return getVShift(true, VT,
                         DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
@@ -4368,11 +4387,12 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
      // Bits [3:0] of the constant are the zero mask.  The DAG Combiner may
      //   combine either bitwise AND or insert of float 0.0 to set these bits.
      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+    // Create this as a scalar to vector..
+    N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
      return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
-  } else if (EVT == MVT::i32) {
-    // InsertPS works with constant index.
-    if (isa<ConstantSDNode>(N2))
-      return Op;
+  } else if (EVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+    // PINSR* works with constant index.
+    return Op;
    }
    return SDValue();
  }
@@ -4414,6 +4434,9 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
                                     DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
                                                 Op.getOperand(0))));
  
+  if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
+
    SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
    MVT VT = MVT::v2i32;
    switch (Op.getValueType().getSimpleVT()) {
@@ -4441,15 +4464,14 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
    // global base reg.
    unsigned char OpFlag = 0;
    unsigned WrapperKind = X86ISD::Wrapper;
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub())
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-    else if (Subtarget->isPICStyleGOT())
-      OpFlag = X86II::MO_GOTOFF;
-    else if (Subtarget->isPICStyleRIPRel() &&
-             getTargetMachine().getCodeModel() == CodeModel::Small)
-      WrapperKind = X86ISD::WrapperRIP;
-  }
+  
+  if (Subtarget->isPICStyleRIPRel() &&
+      getTargetMachine().getCodeModel() == CodeModel::Small)
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
    
    SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
                                               CP->getAlignment(),
@@ -4474,14 +4496,14 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
    // global base reg.
    unsigned char OpFlag = 0;
    unsigned WrapperKind = X86ISD::Wrapper;
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub())
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-    else if (Subtarget->isPICStyleGOT())
-      OpFlag = X86II::MO_GOTOFF;
-    else if (Subtarget->isPICStyleRIPRel())
-      WrapperKind = X86ISD::WrapperRIP;
-  }
+  
+  if (Subtarget->isPICStyleRIPRel() &&
+      getTargetMachine().getCodeModel() == CodeModel::Small)
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
    
    SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
                                            OpFlag);
@@ -4507,14 +4529,13 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
    // global base reg.
    unsigned char OpFlag = 0;
    unsigned WrapperKind = X86ISD::Wrapper;
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub())
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-    else if (Subtarget->isPICStyleGOT())
-      OpFlag = X86II::MO_GOTOFF;
-    else if (Subtarget->isPICStyleRIPRel())
-      WrapperKind = X86ISD::WrapperRIP;
-  }
+  if (Subtarget->isPICStyleRIPRel() &&
+      getTargetMachine().getCodeModel() == CodeModel::Small)
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
    
    SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
    
@@ -4524,7 +4545,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
    
    // With PIC, the address is actually $g + Offset.
    if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->isPICStyleRIPRel()) {
+      !Subtarget->is64Bit()) {
      Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                           DAG.getNode(X86ISD::GlobalBaseReg,
                                       DebugLoc::getUnknownLoc(),
@@ -4539,31 +4560,16 @@ SDValue
  X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
                                        int64_t Offset,
                                        SelectionDAG &DAG) const {
-  bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  bool ExtraLoadRequired =
-    Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false);
-
    // Create the TargetGlobalAddress node, folding in the constant
    // offset if it is legal.
+  unsigned char OpFlags =
+    Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
    SDValue Result;
-  if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
+  if (OpFlags == X86II::MO_NO_FLAG && isInt32(Offset)) {
+    // A direct static reference to a global.
      Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
      Offset = 0;
    } else {
-    unsigned char OpFlags = 0;
-    
-    if (Subtarget->isPICStyleRIPRel() &&
-        getTargetMachine().getRelocationModel() != Reloc::Static) {
-      if (ExtraLoadRequired)
-        OpFlags = X86II::MO_GOTPCREL;
-    } else if (Subtarget->isPICStyleGOT() &&
-               getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-      if (ExtraLoadRequired)
-        OpFlags = X86II::MO_GOT;
-      else
-        OpFlags = X86II::MO_GOTOFF;
-    }
-    
      Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
    }
    
@@ -4574,18 +4580,15 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
      Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
  
    // With PIC, the address is actually $g + Offset.
-  if (IsPic && !Subtarget->isPICStyleRIPRel()) {
+  if (isGlobalRelativeToPICBase(OpFlags)) {
      Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                           DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
                           Result);
    }
  
-  // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
-  // load the value at address GV, not the value of GV itself. This means that
-  // the GlobalAddress must be in the base or index register of the address, not
-  // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
-  // The same applies for external symbols during PIC codegen
-  if (ExtraLoadRequired)
+  // For globals that require a load from a stub to get the address, emit the
+  // load.
+  if (isGlobalStubReference(OpFlags))
      Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
                           PseudoSourceValue::getGOT(), 0);
  
@@ -4724,7 +4727,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
                                 Subtarget->is64Bit());
    }
    
-  assert(0 && "Unreachable");
+  llvm_unreachable("Unreachable");
    return SDValue();
  }
  
@@ -4740,10 +4743,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
    SDValue ShOpLo = Op.getOperand(0);
    SDValue ShOpHi = Op.getOperand(1);
    SDValue ShAmt  = Op.getOperand(2);
-  SDValue Tmp1 = isSRA ?
-    DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
-                DAG.getConstant(VTBits - 1, MVT::i8)) :
-    DAG.getConstant(0, VT);
+  SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+                                     DAG.getConstant(VTBits - 1, MVT::i8))
+                       : DAG.getConstant(0, VT);
  
    SDValue Tmp2, Tmp3;
    if (Op.getOpcode() == ISD::SHL_PARTS) {
@@ -4755,9 +4757,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
    }
  
    SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
-                                  DAG.getConstant(VTBits, MVT::i8));
+                                DAG.getConstant(VTBits, MVT::i8));
    SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT,
-                               AndNode, DAG.getConstant(0, MVT::i8));
+                             AndNode, DAG.getConstant(0, MVT::i8));
  
    SDValue Hi, Lo;
    SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
@@ -4889,19 +4891,22 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
    */
  
    DebugLoc dl = Op.getDebugLoc();
+  LLVMContext *Context = DAG.getContext();
  
    // Build some magic constants.
    std::vector<Constant*> CV0;
-  CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
    Constant *C0 = ConstantVector::get(CV0);
    SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
  
    std::vector<Constant*> CV1;
-  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
-  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
+  CV1.push_back(
+    ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+  CV1.push_back(
+    ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
    Constant *C1 = ConstantVector::get(CV1);
    SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
  
@@ -5047,7 +5052,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
    
    unsigned Opc;
    switch (DstTy.getSimpleVT()) {
-  default: assert(0 && "Invalid FP_TO_SINT to lower!");
+  default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
@@ -5106,6 +5111,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
  }
  
  SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
    DebugLoc dl = Op.getDebugLoc();
    MVT VT = Op.getValueType();
    MVT EltVT = VT;
@@ -5113,11 +5119,11 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
      EltVT = VT.getVectorElementType();
    std::vector<Constant*> CV;
    if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
      CV.push_back(C);
      CV.push_back(C);
    } else {
-    Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
@@ -5132,6 +5138,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
  }
  
  SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
    DebugLoc dl = Op.getDebugLoc();
    MVT VT = Op.getValueType();
    MVT EltVT = VT;
@@ -5142,11 +5149,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
    }
    std::vector<Constant*> CV;
    if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
      CV.push_back(C);
      CV.push_back(C);
    } else {
-    Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
@@ -5169,6 +5176,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
  }
  
  SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    DebugLoc dl = Op.getDebugLoc();
@@ -5192,13 +5200,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
    // First get the sign bit of second operand.
    std::vector<Constant*> CV;
    if (SrcVT == MVT::f64) {
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
    } else {
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
    }
    Constant *C = ConstantVector::get(CV);
    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -5221,13 +5229,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
    // Clear first operand sign bit.
    CV.clear();
    if (VT == MVT::f64) {
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
    } else {
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
    }
    C = ConstantVector::get(CV);
    CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -5470,7 +5478,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
          NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
          return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
        }
-      assert(0 && "Illegal FP comparison");
+      llvm_unreachable("Illegal FP comparison");
      }
      // Handle all other FP comparisons here.
      return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
@@ -5484,8 +5492,11 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
  
    switch (VT.getSimpleVT()) {
    default: break;
+  case MVT::v8i8:
    case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+  case MVT::v4i16:
    case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+  case MVT::v2i32:
    case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
    case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
    }
@@ -6179,6 +6190,36 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
                                  DAG.getConstant(X86CC, MVT::i8), Cond);
      return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
    }
+  // ptest intrinsics. The intrinsic these come from are designed to return
+  // an integer value, not just an instruction so lower it to the ptest
+  // pattern and a setcc for the result.
+  case Intrinsic::x86_sse41_ptestz:
+  case Intrinsic::x86_sse41_ptestc:
+  case Intrinsic::x86_sse41_ptestnzc:{
+    unsigned X86CC = 0;
+    switch (IntNo) {
+    default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+    case Intrinsic::x86_sse41_ptestz:
+      // ZF = 1
+      X86CC = X86::COND_E;
+      break;
+    case Intrinsic::x86_sse41_ptestc:
+      // CF = 1
+      X86CC = X86::COND_B;
+      break;
+    case Intrinsic::x86_sse41_ptestnzc: 
+      // ZF and CF = 0
+      X86CC = X86::COND_A;
+      break;
+    }
+       
+    SDValue LHS = Op.getOperand(1);
+    SDValue RHS = Op.getOperand(2);
+    SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
+    SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+  }
  
    // Fix vector shift instructions where the last operand is a non-immediate
    // i32 value.
@@ -6256,7 +6297,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
        case Intrinsic::x86_mmx_psrai_d:
          NewIntNo = Intrinsic::x86_mmx_psra_d;
          break;
-      default: LLVM_UNREACHABLE("Impossible intrinsic");  // Can't reach here.
+      default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
        }
        break;
      }
@@ -6406,7 +6447,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
  
      switch (CC) {
      default:
-      assert(0 && "Unsupported calling convention");
+      llvm_unreachable("Unsupported calling convention");
      case CallingConv::C:
      case CallingConv::X86_StdCall: {
        // Pass 'nest' parameter in ECX.
@@ -6655,7 +6696,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
    DebugLoc dl = Op.getDebugLoc();
  
    switch (Op.getOpcode()) {
-  default: assert(0 && "Unknown ovf instruction!");
+  default: llvm_unreachable("Unknown ovf instruction!");
    case ISD::SADDO:
      // A subtract of one will be selected as a INC. Note that INC doesn't
      // set CF, so we can't do this for UADDO.
@@ -6777,7 +6818,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
  ///
  SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
    switch (Op.getOpcode()) {
-  default: assert(0 && "Should not custom lower this!");
+  default: llvm_unreachable("Should not custom lower this!");
    case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
    case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
    case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
@@ -7027,6 +7068,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::INC:                return "X86ISD::INC";
    case X86ISD::DEC:                return "X86ISD::DEC";
    case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
+  case X86ISD::PTEST:              return "X86ISD::PTEST";
    }
  }
  
@@ -7041,12 +7083,16 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
      return false;
  
    if (AM.BaseGV) {
-    // We can only fold this if we don't need an extra load.
-    if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
+    unsigned GVFlags =
+      Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+    
+    // If a reference to this global requires an extra load, we can't fold it.
+    if (isGlobalStubReference(GVFlags))
        return false;
-    // If BaseGV requires a register, we cannot also have a BaseReg.
-    if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) &&
-        AM.HasBaseReg)
+    
+    // If BaseGV requires a register for the PIC base, we cannot also have a
+    // BaseReg specified.
+    if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
        return false;
  
      // X86-64 only supports addr of globals in small code model.
@@ -7621,7 +7667,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      // Get the X86 opcode to use.
      unsigned Opc;
      switch (MI->getOpcode()) {
-    default: assert(0 && "illegal opcode!");
+    default: llvm_unreachable("illegal opcode!");
      case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
      case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
      case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
@@ -8360,7 +8406,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
    SDValue  ValOp = N->getOperand(0);
    switch (N->getOpcode()) {
    default:
-    assert(0 && "Unknown shift opcode!");
+    llvm_unreachable("Unknown shift opcode!");
      break;
    case ISD::SHL:
      if (VT == MVT::v2i64)
@@ -8656,6 +8702,100 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  //                           X86 Inline Assembly Support
  //===----------------------------------------------------------------------===//
  
+static bool LowerToBSwap(CallInst *CI) {
+  // FIXME: this should verify that we are targetting a 486 or better.  If not,
+  // we will turn this bswap into something that will be lowered to logical ops
+  // instead of emitting the bswap asm.  For now, we don't support 486 or lower
+  // so don't worry about this.
+  
+  // Verify this is a simple bswap.
+  if (CI->getNumOperands() != 2 ||
+      CI->getType() != CI->getOperand(1)->getType() ||
+      !CI->getType()->isInteger())
+    return false;
+  
+  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  if (!Ty || Ty->getBitWidth() % 16 != 0)
+    return false;
+  
+  // Okay, we can do this xform, do so now.
+  const Type *Tys[] = { Ty };
+  Module *M = CI->getParent()->getParent()->getParent();
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  
+  Value *Op = CI->getOperand(1);
+  Op = CallInst::Create(Int, Op, CI->getName(), CI);
+  
+  CI->replaceAllUsesWith(Op);
+  CI->eraseFromParent();
+  return true;
+}
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+  std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+
+  std::string AsmStr = IA->getAsmString();
+
+  // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+  std::vector<std::string> AsmPieces;
+  SplitString(AsmStr, AsmPieces, "\n");  // ; as separator?
+
+  switch (AsmPieces.size()) {
+  default: return false;
+  case 1:
+    AsmStr = AsmPieces[0];
+    AsmPieces.clear();
+    SplitString(AsmStr, AsmPieces, " \t");  // Split with whitespace.
+
+    // bswap $0
+    if (AsmPieces.size() == 2 &&
+        (AsmPieces[0] == "bswap" ||
+         AsmPieces[0] == "bswapq" ||
+         AsmPieces[0] == "bswapl") &&
+        (AsmPieces[1] == "$0" ||
+         AsmPieces[1] == "${0:q}")) {
+      // No need to check constraints, nothing other than the equivalent of
+      // "=r,0" would be valid here.
+      return LowerToBSwap(CI);
+    }
+    // rorw $$8, ${0:w}  -->  llvm.bswap.i16
+    if (CI->getType() == Type::Int16Ty &&
+        AsmPieces.size() == 3 &&
+        AsmPieces[0] == "rorw" &&
+        AsmPieces[1] == "$$8," &&
+        AsmPieces[2] == "${0:w}" &&
+        IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
+      return LowerToBSwap(CI);
+    }
+    break;
+  case 3:
+    if (CI->getType() == Type::Int64Ty && Constraints.size() >= 2 &&
+        Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+        Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+      // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
+      std::vector<std::string> Words;
+      SplitString(AsmPieces[0], Words, " \t");
+      if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+        Words.clear();
+        SplitString(AsmPieces[1], Words, " \t");
+        if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+          Words.clear();
+          SplitString(AsmPieces[2], Words, " \t,");
+          if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+              Words[2] == "%edx") {
+            return LowerToBSwap(CI);
+          }
+        }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
+
+
  /// getConstraintType - Given a constraint letter, return the type of
  /// constraint it is for this target.
  X86TargetLowering::ConstraintType
@@ -8806,17 +8946,18 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
        // Otherwise, this isn't something we can handle, reject it.
        return;
      }
+    
+    GlobalValue *GV = GA->getGlobal();
      // If we require an extra load to get this address, as in PIC mode, we
      // can't accept it.
-    if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(),
-                                       getTargetMachine(), false))
+    if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
+                                                        getTargetMachine())))
        return;
  
      if (hasMemory)
-      Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG);
+      Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
      else
-      Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
-                                      Offset);
+      Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
      Result = Op;
      break;
    }
@@ -8837,7 +8978,37 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
      // FIXME: not handling fp-stack yet!
      switch (Constraint[0]) {      // GCC X86 Constraint Letters
      default: break;  // Unknown constraint letter
-    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
+    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+      if (Subtarget->is64Bit()) {
+        if (VT == MVT::i32)
+          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
+                                       X86::ESI, X86::EDI, X86::R8D, X86::R9D,
+                                       X86::R10D,X86::R11D,X86::R12D,
+                                       X86::R13D,X86::R14D,X86::R15D,
+                                       X86::EBP, X86::ESP, 0);
+        else if (VT == MVT::i16)
+          return make_vector<unsigned>(X86::AX,  X86::DX,  X86::CX, X86::BX,
+                                       X86::SI,  X86::DI,  X86::R8W,X86::R9W,
+                                       X86::R10W,X86::R11W,X86::R12W,
+                                       X86::R13W,X86::R14W,X86::R15W,
+                                       X86::BP,  X86::SP, 0);
+        else if (VT == MVT::i8)
+          return make_vector<unsigned>(X86::AL,  X86::DL,  X86::CL, X86::BL,
+                                       X86::SIL, X86::DIL, X86::R8B,X86::R9B,
+                                       X86::R10B,X86::R11B,X86::R12B,
+                                       X86::R13B,X86::R14B,X86::R15B,
+                                       X86::BPL, X86::SPL, 0);
+
+        else if (VT == MVT::i64)
+          return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
+                                       X86::RSI, X86::RDI, X86::R8,  X86::R9,
+                                       X86::R10, X86::R11, X86::R12,
+                                       X86::R13, X86::R14, X86::R15,
+                                       X86::RBP, X86::RSP, 0);
+
+        break;
+      }
+      // 32-bit fallthrough 
      case 'Q':   // Q_REGS
        if (VT == MVT::i32)
          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
@@ -8927,7 +9098,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
      // 'A' means EAX + EDX.
      if (Constraint == "A") {
        Res.first = X86::EAX;
-      Res.second = X86::GRADRegisterClass;
+      Res.second = X86::GR32_ADRegisterClass;
      }
      return Res;
    }