Fix incorrect assert that should be a user error for code like 'mov $0, %%eax'.

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 3600987f26a624ba6d80f3213bd939666f58c2dd..11cc678944056423f5fd8fbf7f991c4b91942a7f 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,6 +16,7 @@
  #include "X86InstrBuilder.h"
  #include "X86ISelLowering.h"
  #include "X86TargetMachine.h"
+#include "X86TargetObjectFile.h"
  #include "llvm/CallingConv.h"
  #include "llvm/Constants.h"
  #include "llvm/DerivedTypes.h"
@@ -36,7 +37,6 @@
  #include "llvm/Support/MathExtras.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
  #include "llvm/Target/TargetOptions.h"
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/ADT/StringExtras.h"
@@ -47,6 +47,14 @@ using namespace llvm;
  static cl::opt<bool>
  DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
  
+// Disable16Bit - 16-bit operations typically have a larger encoding than
+// corresponding 32-bit instructions, and 16-bit code is slow on some
+// processors. This is an experimental flag to disable 16-bit operations
+// (which forces them to be Legalized to 32-bit operations).
+static cl::opt<bool>
+Disable16Bit("disable-16bit", cl::Hidden,
+             cl::desc("Disable use of 16-bit instructions"));
+
  // Forward declarations.
  static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                         SDValue V2);
@@ -55,6 +63,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
    switch (TM.getSubtarget<X86Subtarget>().TargetType) {
    default: llvm_unreachable("unknown subtarget type");
    case X86Subtarget::isDarwin:
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return new X8664_MachoTargetObjectFile();
      return new TargetLoweringObjectFileMachO();
    case X86Subtarget::isELF:
      return new TargetLoweringObjectFileELF();
@@ -99,7 +109,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
  
    // Set up the register classes.
    addRegisterClass(MVT::i8, X86::GR8RegisterClass);
-  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+  if (!Disable16Bit)
+    addRegisterClass(MVT::i16, X86::GR16RegisterClass);
    addRegisterClass(MVT::i32, X86::GR32RegisterClass);
    if (Subtarget->is64Bit())
      addRegisterClass(MVT::i64, X86::GR64RegisterClass);
@@ -108,9 +119,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
  
    // We don't accept any truncstore of integer registers.
    setTruncStoreAction(MVT::i64, MVT::i32, Expand);
-  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+  if (!Disable16Bit)
+    setTruncStoreAction(MVT::i64, MVT::i16, Expand);
    setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
-  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+  if (!Disable16Bit)
+    setTruncStoreAction(MVT::i32, MVT::i16, Expand);
    setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
    setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
  
@@ -261,8 +274,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
    setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
    setOperationAction(ISD::CTLZ             , MVT::i8   , Custom);
    setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
-  setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
-  setOperationAction(ISD::CTLZ             , MVT::i16  , Custom);
+  if (Disable16Bit) {
+    setOperationAction(ISD::CTTZ           , MVT::i16  , Expand);
+    setOperationAction(ISD::CTLZ           , MVT::i16  , Expand);
+  } else {
+    setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
+    setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
+  }
    setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
    setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
    setOperationAction(ISD::CTLZ             , MVT::i32  , Custom);
@@ -279,13 +297,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
    setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
    // X86 wants to expand cmov itself.
    setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
-  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
+  if (Disable16Bit)
+    setOperationAction(ISD::SELECT        , MVT::i16  , Expand);
+  else
+    setOperationAction(ISD::SELECT        , MVT::i16  , Custom);
    setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
    setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
    setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
    setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
    setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
-  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
+  if (Disable16Bit)
+    setOperationAction(ISD::SETCC         , MVT::i16  , Expand);
+  else
+    setOperationAction(ISD::SETCC         , MVT::i16  , Custom);
    setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
    setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
    setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
@@ -1055,49 +1079,6 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
    return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
  }
  
-/// getPreferredLSDADataFormat - Return the preferred exception handling data
-/// format for the LSDA.
-unsigned X86TargetLowering::getPreferredLSDADataFormat() const {
-  if (Subtarget->isTargetDarwin())
-    return dwarf::DW_EH_PE_pcrel;
-
-  CodeModel::Model M = getTargetMachine().getCodeModel();
-
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (!Subtarget->is64Bit() || M == CodeModel::Small)
-      return dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-
-    return dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
-  }
-
-  if (M == CodeModel::Small)
-    return dwarf::DW_EH_PE_sdata4;
-
-  return dwarf::DW_EH_PE_absptr;
-}
-
-/// getPreferredFDEDataFormat - Return the preferred exception handling data
-/// format for the FDE.
-unsigned X86TargetLowering::getPreferredFDEDataFormat() const {
-  if (Subtarget->isTargetDarwin())
-    return dwarf::DW_EH_PE_pcrel;
-
-  CodeModel::Model M = getTargetMachine().getCodeModel();
-
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (!Subtarget->is64Bit() ||
-        M == CodeModel::Small || M == CodeModel::Medium)
-      return dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-
-    return dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
-  }
-
-  if (M == CodeModel::Small || M == CodeModel::Medium)
-    return dwarf::DW_EH_PE_sdata4;
-
-  return dwarf::DW_EH_PE_absptr;
-}
-
  //===----------------------------------------------------------------------===//
  //               Return Value Calling Convention Implementation
  //===----------------------------------------------------------------------===//
@@ -1106,7 +1087,7 @@ unsigned X86TargetLowering::getPreferredFDEDataFormat() const {
  
  SDValue
  X86TargetLowering::LowerReturn(SDValue Chain,
-                               unsigned CallConv, bool isVarArg,
+                               CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 DebugLoc dl, SelectionDAG &DAG) {
  
@@ -1128,7 +1109,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
    SmallVector<SDValue, 6> RetOps;
    RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
    // Operand #1 = Bytes To Pop
-  RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
+  RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16));
  
    // Copy the result values into the output registers.
    for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1198,7 +1179,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
  ///
  SDValue
  X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
-                                   unsigned CallConv, bool isVarArg,
+                                   CallingConv::ID CallConv, bool isVarArg,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) {
@@ -1298,7 +1279,7 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
  
  /// IsCalleePop - Determines whether the callee is required to pop its
  /// own arguments. Callee pop is necessary to support tail calls.
-bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
+bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
    if (IsVarArg)
      return false;
  
@@ -1316,7 +1297,7 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
  
  /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
  /// given CallingConvention value.
-CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
+CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
    if (Subtarget->is64Bit()) {
      if (Subtarget->isTargetWin64())
        return CC_X86_Win64_C;
@@ -1335,7 +1316,7 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
  /// NameDecorationForCallConv - Selects the appropriate decoration to
  /// apply to a MachineFunction containing a given calling convention.
  NameDecorationStyle
-X86TargetLowering::NameDecorationForCallConv(unsigned CallConv) {
+X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) {
    if (CallConv == CallingConv::X86_FastCall)
      return FastCall;
    else if (CallConv == CallingConv::X86_StdCall)
@@ -1359,7 +1340,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
  
  SDValue
  X86TargetLowering::LowerMemArgument(SDValue Chain,
-                                    unsigned CallConv,
+                                    CallingConv::ID CallConv,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                      DebugLoc dl, SelectionDAG &DAG,
                                      const CCValAssign &VA,
@@ -1394,7 +1375,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
  
  SDValue
  X86TargetLowering::LowerFormalArguments(SDValue Chain,
-                                        unsigned CallConv,
+                                        CallingConv::ID CallConv,
                                          bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg> &Ins,
                                          DebugLoc dl,
@@ -1695,7 +1676,8 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
  
  SDValue
  X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
-                             unsigned CallConv, bool isVarArg, bool isTailCall,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool isTailCall,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl, SelectionDAG &DAG,
@@ -2140,12 +2122,12 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
  /// optimization should implement this function.
  bool
  X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
-                                                     unsigned CalleeCC,
+                                                     CallingConv::ID CalleeCC,
                                                       bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg> &Ins,
                                                       SelectionDAG& DAG) const {
    MachineFunction &MF = DAG.getMachineFunction();
-  unsigned CallerCC = MF.getFunction()->getCallingConv();
+  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
    return CalleeCC == CallingConv::Fast && CallerCC == CalleeCC;
  }
  
@@ -5218,11 +5200,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
    DebugLoc dl = Op.getDebugLoc();
    EVT VT = Op.getValueType();
    EVT EltVT = VT;
-  unsigned EltNum = 1;
-  if (VT.isVector()) {
+  if (VT.isVector())
      EltVT = VT.getVectorElementType();
-    EltNum = VT.getVectorNumElements();
-  }
    std::vector<Constant*> CV;
    if (EltVT == MVT::f64) {
      Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
@@ -6379,9 +6358,23 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
        break;
      }
      }
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+    // to be zero.
+    SDValue ShOps[4];
+    ShOps[0] = ShAmt;
+    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    if (ShAmtVT == MVT::v4i32) {
+      ShOps[2] = DAG.getUNDEF(MVT::i32);
+      ShOps[3] = DAG.getUNDEF(MVT::i32);
+      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
+    } else {
+      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+    }
+
      EVT VT = Op.getValueType();
-    ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT,
-                        DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShAmtVT, ShAmt));
+    ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt);
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
                         DAG.getConstant(NewIntNo, MVT::i32),
                         Op.getOperand(1), ShAmt);
@@ -6519,7 +6512,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
    } else {
      const Function *Func =
        cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
-    unsigned CC = Func->getCallingConv();
+    CallingConv::ID CC = Func->getCallingConv();
      unsigned NestReg;
  
      switch (CC) {
@@ -7745,11 +7738,67 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
    return EndMBB;
  }
  
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
+                                     MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+  
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+  
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   bCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  unsigned Opc =
+    X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+  BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the new block which will contain the Phi node for the select.
+  sinkMBB->transferSuccessors(BB);
+  
+  // Add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+  
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+  
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+  
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = sinkMBB;
+  BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+  F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+  return BB;
+}
+
+
  MachineBasicBlock *
  X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                 MachineBasicBlock *BB) const {
-  DebugLoc dl = MI->getDebugLoc();
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
    switch (MI->getOpcode()) {
    default: assert(false && "Unexpected instr type to insert");
    case X86::CMOV_GR8:
@@ -7758,57 +7807,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
    case X86::CMOV_FR64:
    case X86::CMOV_V4F32:
    case X86::CMOV_V2F64:
-  case X86::CMOV_V2I64: {
-    // To "insert" a SELECT_CC instruction, we actually have to insert the
-    // diamond control-flow pattern.  The incoming instruction knows the
-    // destination vreg to set, the condition code register to branch on, the
-    // true/false values to select between, and a branch opcode to use.
-    const BasicBlock *LLVM_BB = BB->getBasicBlock();
-    MachineFunction::iterator It = BB;
-    ++It;
-
-    //  thisMBB:
-    //  ...
-    //   TrueVal = ...
-    //   cmpTY ccX, r1, r2
-    //   bCC copy1MBB
-    //   fallthrough --> copy0MBB
-    MachineBasicBlock *thisMBB = BB;
-    MachineFunction *F = BB->getParent();
-    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
-    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
-    unsigned Opc =
-      X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
-    BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
-    F->insert(It, copy0MBB);
-    F->insert(It, sinkMBB);
-    // Update machine-CFG edges by transferring all successors of the current
-    // block to the new block which will contain the Phi node for the select.
-    sinkMBB->transferSuccessors(BB);
-
-    // Add the true and fallthrough blocks as its successors.
-    BB->addSuccessor(copy0MBB);
-    BB->addSuccessor(sinkMBB);
-
-    //  copy0MBB:
-    //   %FalseValue = ...
-    //   # fallthrough to sinkMBB
-    BB = copy0MBB;
-
-    // Update machine-CFG edges
-    BB->addSuccessor(sinkMBB);
-
-    //  sinkMBB:
-    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
-    //  ...
-    BB = sinkMBB;
-    BuildMI(BB, dl, TII->get(X86::PHI), MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
-      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
-
-    F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
-    return BB;
-  }
+  case X86::CMOV_V2I64:
+    return EmitLoweredSelect(MI, BB);
  
    case X86::FP32_TO_INT16_IN_MEM:
    case X86::FP32_TO_INT32_IN_MEM:
@@ -7819,27 +7819,30 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
    case X86::FP80_TO_INT16_IN_MEM:
    case X86::FP80_TO_INT32_IN_MEM:
    case X86::FP80_TO_INT64_IN_MEM: {
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    DebugLoc DL = MI->getDebugLoc();
+
      // Change the floating point control register to use "round towards zero"
      // mode when truncating to an integer value.
      MachineFunction *F = BB->getParent();
      int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx);
  
      // Load the old value of the high byte of the control word...
      unsigned OldCW =
        F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16rm), OldCW),
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW),
                        CWFrameIdx);
  
      // Set the high part to be round to zero...
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mi)), CWFrameIdx)
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
        .addImm(0xC7F);
  
      // Reload the modified control word now...
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
  
      // Restore the memory image of control word to original value
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mr)), CWFrameIdx)
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
        .addReg(OldCW);
  
      // Get the X86 opcode to use.
@@ -7878,11 +7881,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      } else {
        AM.Disp = Op.getImm();
      }
-    addFullAddress(BuildMI(BB, dl, TII->get(Opc)), AM)
+    addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM)
                        .addReg(MI->getOperand(X86AddrNumOperands).getReg());
  
      // Reload the original control word now.
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
  
      F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
      return BB;
@@ -8246,8 +8249,18 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
      } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
        switch (CC) {
        default: break;
-      case ISD::SETOGT: // (X > Y) ? Y : X -> min
-      case ISD::SETUGT:
+      case ISD::SETOGT:
+        // This can use a min only if the LHS isn't NaN.
+        if (DAG.isKnownNeverNaN(LHS))
+          Opcode = X86ISD::FMIN;
+        else if (DAG.isKnownNeverNaN(RHS)) {
+          Opcode = X86ISD::FMIN;
+          // Put the potential NaN in the RHS so that SSE will preserve it.
+          std::swap(LHS, RHS);
+        }
+        break;
+
+      case ISD::SETUGT: // (X > Y) ? Y : X -> min
        case ISD::SETGT:
          if (!UnsafeFPMath) break;
          // FALL THROUGH.
@@ -8256,8 +8269,18 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
          Opcode = X86ISD::FMIN;
          break;
  
-      case ISD::SETOLE:   // (X <= Y) ? Y : X -> max
        case ISD::SETULE:
+        // This can use a max only if the LHS isn't NaN.
+        if (DAG.isKnownNeverNaN(LHS))
+          Opcode = X86ISD::FMAX;
+        else if (DAG.isKnownNeverNaN(RHS)) {
+          Opcode = X86ISD::FMAX;
+          // Put the potential NaN in the RHS so that SSE will preserve it.
+          std::swap(LHS, RHS);
+        }
+        break;
+
+      case ISD::SETOLE:   // (X <= Y) ? Y : X -> max
        case ISD::SETLE:
          if (!UnsafeFPMath) break;
          // FALL THROUGH.
@@ -8564,7 +8587,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
    SDValue ShAmtOp = N->getOperand(1);
    EVT EltVT = VT.getVectorElementType();
    DebugLoc DL = N->getDebugLoc();
-  SDValue BaseShAmt;
+  SDValue BaseShAmt = SDValue();
    if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
      unsigned NumElts = VT.getVectorNumElements();
      unsigned i = 0;
@@ -8583,15 +8606,34 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
      }
    } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
               cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
-    BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
-                            DAG.getIntPtrConstant(0));
+    SDValue InVec = ShAmtOp.getOperand(0);
+    if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+      unsigned NumElts = InVec.getValueType().getVectorNumElements();
+      unsigned i = 0;
+      for (; i != NumElts; ++i) {
+        SDValue Arg = InVec.getOperand(i);
+        if (Arg.getOpcode() == ISD::UNDEF) continue;
+        BaseShAmt = Arg;
+        break;
+      }
+    } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+         unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+         if (C->getZExtValue() == SplatIdx)
+           BaseShAmt = InVec.getOperand(1);
+       }
+    }
+    if (BaseShAmt.getNode() == 0)
+      BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+                              DAG.getIntPtrConstant(0));
    } else
      return SDValue();
  
+  // The shift amount is an i32.
    if (EltVT.bitsGT(MVT::i32))
      BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
    else if (EltVT.bitsLT(MVT::i32))
-    BaseShAmt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BaseShAmt);
+    BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
  
    // The shift amount is identical so we can do a vector shift.
    SDValue  ValOp = N->getOperand(0);
@@ -9284,15 +9326,39 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
  
    // Not found as a standard register?
    if (Res.second == 0) {
-    // GCC calls "st(0)" just plain "st".
+    // Map st(0) -> st(7) -> ST0
+    if (Constraint.size() == 7 && Constraint[0] == '{' &&
+        tolower(Constraint[1]) == 's' &&
+        tolower(Constraint[2]) == 't' &&
+        Constraint[3] == '(' &&
+        (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+        Constraint[5] == ')' &&
+        Constraint[6] == '}') {
+      
+      Res.first = X86::ST0+Constraint[4]-'0';
+      Res.second = X86::RFP80RegisterClass;
+      return Res;
+    }
+    
+    // GCC allows "st(0)" to be called just plain "st".
      if (StringsEqualNoCase("{st}", Constraint)) {
        Res.first = X86::ST0;
        Res.second = X86::RFP80RegisterClass;
+      return Res;
      }
+
+    // flags -> EFLAGS
+    if (StringsEqualNoCase("{flags}", Constraint)) {
+      Res.first = X86::EFLAGS;
+      Res.second = X86::CCRRegisterClass;
+      return Res;
+    }
+    
      // 'A' means EAX + EDX.
      if (Constraint == "A") {
        Res.first = X86::EAX;
        Res.second = X86::GR32_ADRegisterClass;
+      return Res;
      }
      return Res;
    }