Revert the ConstantInt constructors back to their 2.5 forms where possible, thanks...

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 2c6a727f08f1cef51ccdb3737e85be6faac1b904..15c6dfead43c5bb42e06d84e8e7ca635a1330f9f 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -22,7 +22,9 @@
  #include "llvm/GlobalAlias.h"
  #include "llvm/GlobalVariable.h"
  #include "llvm/Function.h"
+#include "llvm/Instructions.h"
  #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
  #include "llvm/ADT/BitVector.h"
  #include "llvm/ADT/VectorExtras.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -64,7 +66,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
    setShiftAmountType(MVT::i8);
    setBooleanContents(ZeroOrOneBooleanContent);
    setSchedulingPreference(SchedulingForRegPressure);
-  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
    setStackPointerRegisterToSaveRestore(X86StackPtr);
  
    if (Subtarget->isTargetDarwin()) {
@@ -639,6 +640,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      setOperationAction(ISD::SELECT,             MVT::v4i16, Promote);
      setOperationAction(ISD::SELECT,             MVT::v2i32, Promote);
      setOperationAction(ISD::SELECT,             MVT::v1i64, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i8, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v4i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v2i32, Custom);
    }
  
    if (!UseSoftFloat && Subtarget->hasSSE1()) {
@@ -1048,7 +1052,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
    SmallVector<CCValAssign, 16> RVLocs;
    unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
    bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs, DAG.getContext());
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext());
    CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86);
  
    // If this is the first return lowered for this function, add the regs to the
@@ -1175,7 +1179,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
    bool isVarArg = TheCall->isVarArg();
    bool Is64Bit = Subtarget->is64Bit();
    CCState CCInfo(CallingConv, isVarArg, getTargetMachine(),
-                 RVLocs, DAG.getContext());
+                 RVLocs, *DAG.getContext());
    CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
  
    SmallVector<SDValue, 8> ResultVals;
@@ -1384,7 +1388,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
  
    // Assign locations to all of the incoming arguments.
    SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, DAG.getContext());
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext());
    CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC));
  
    SmallVector<SDValue, 8> ArgValues;
@@ -1426,7 +1430,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
            }
          }
        } else {
-        assert(0 && "Unknown argument type!");
+        llvm_unreachable("Unknown argument type!");
        }
  
        unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC);
@@ -1679,7 +1683,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
  
    // Analyze operands of the call, assigning locations to each operand.
    SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, DAG.getContext());
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext());
    CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC));
  
    // Get a count of how many bytes are to be pushed on the stack.
@@ -1721,7 +1725,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
  
      // Promote the value if needed.
      switch (VA.getLocInfo()) {
-    default: assert(0 && "Unknown loc info!");
+    default: llvm_unreachable("Unknown loc info!");
      case CCValAssign::Full: break;
      case CCValAssign::SExt:
        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
@@ -2167,7 +2171,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
      }
  
      switch (SetCCOpcode) {
-    default: assert(0 && "Invalid integer condition!");
+    default: llvm_unreachable("Invalid integer condition!");
      case ISD::SETEQ:  return X86::COND_E;
      case ISD::SETGT:  return X86::COND_G;
      case ISD::SETGE:  return X86::COND_GE;
@@ -2207,7 +2211,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
    //  1 | 0 | 0 | X == Y
    //  1 | 1 | 1 | unordered
    switch (SetCCOpcode) {
-  default: assert(0 && "Condcode should be pre-legalized away");
+  default: llvm_unreachable("Condcode should be pre-legalized away");
    case ISD::SETUEQ:
    case ISD::SETEQ:   return X86::COND_E;
    case ISD::SETOLT:              // flipped
@@ -4379,11 +4383,12 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
      // Bits [3:0] of the constant are the zero mask.  The DAG Combiner may
      //   combine either bitwise AND or insert of float 0.0 to set these bits.
      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+    // Create this as a scalar to vector..
+    N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
      return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
-  } else if (EVT == MVT::i32) {
-    // InsertPS works with constant index.
-    if (isa<ConstantSDNode>(N2))
-      return Op;
+  } else if (EVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+    // PINSR* works with constant index.
+    return Op;
    }
    return SDValue();
  }
@@ -4453,7 +4458,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
    unsigned char OpFlag = 0;
    unsigned WrapperKind = X86ISD::Wrapper;
    
-  if (Subtarget->is64Bit() &&
+  if (Subtarget->isPICStyleRIPRel() &&
        getTargetMachine().getCodeModel() == CodeModel::Small)
      WrapperKind = X86ISD::WrapperRIP;
    else if (Subtarget->isPICStyleGOT())
@@ -4485,7 +4490,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
    unsigned char OpFlag = 0;
    unsigned WrapperKind = X86ISD::Wrapper;
    
-  if (Subtarget->is64Bit() &&
+  if (Subtarget->isPICStyleRIPRel() &&
        getTargetMachine().getCodeModel() == CodeModel::Small)
      WrapperKind = X86ISD::WrapperRIP;
    else if (Subtarget->isPICStyleGOT())
@@ -4517,7 +4522,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
    // global base reg.
    unsigned char OpFlag = 0;
    unsigned WrapperKind = X86ISD::Wrapper;
-  if (Subtarget->is64Bit() &&
+  if (Subtarget->isPICStyleRIPRel() &&
        getTargetMachine().getCodeModel() == CodeModel::Small)
      WrapperKind = X86ISD::WrapperRIP;
    else if (Subtarget->isPICStyleGOT())
@@ -4561,7 +4566,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
      Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
    }
    
-  if (Subtarget->is64Bit() &&
+  if (Subtarget->isPICStyleRIPRel() &&
        getTargetMachine().getCodeModel() == CodeModel::Small)
      Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
    else
@@ -4715,7 +4720,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
                                 Subtarget->is64Bit());
    }
    
-  assert(0 && "Unreachable");
+  llvm_unreachable("Unreachable");
    return SDValue();
  }
  
@@ -4880,20 +4885,23 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
    */
  
    DebugLoc dl = Op.getDebugLoc();
+  LLVMContext *Context = DAG.getContext();
  
    // Build some magic constants.
    std::vector<Constant*> CV0;
-  CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0)));
-  Constant *C0 = ConstantVector::get(CV0);
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  Constant *C0 = Context->getConstantVector(CV0);
    SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
  
    std::vector<Constant*> CV1;
-  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
-  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
-  Constant *C1 = ConstantVector::get(CV1);
+  CV1.push_back(
+    Context->getConstantFP(APFloat(APInt(64, 0x4530000000000000ULL))));
+  CV1.push_back(
+    Context->getConstantFP(APFloat(APInt(64, 0x4330000000000000ULL))));
+  Constant *C1 = Context->getConstantVector(CV1);
    SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
  
    SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
@@ -5038,7 +5046,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
    
    unsigned Opc;
    switch (DstTy.getSimpleVT()) {
-  default: assert(0 && "Invalid FP_TO_SINT to lower!");
+  default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
@@ -5097,6 +5105,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
  }
  
  SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
    DebugLoc dl = Op.getDebugLoc();
    MVT VT = Op.getValueType();
    MVT EltVT = VT;
@@ -5104,17 +5113,17 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
      EltVT = VT.getVectorElementType();
    std::vector<Constant*> CV;
    if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
+    Constant *C = Context->getConstantFP(APFloat(APInt(64, ~(1ULL << 63))));
      CV.push_back(C);
      CV.push_back(C);
    } else {
-    Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
+    Constant *C = Context->getConstantFP(APFloat(APInt(32, ~(1U << 31))));
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
    }
-  Constant *C = ConstantVector::get(CV);
+  Constant *C = Context->getConstantVector(CV);
    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
    SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                                 PseudoSourceValue::getConstantPool(), 0,
@@ -5123,6 +5132,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
  }
  
  SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
    DebugLoc dl = Op.getDebugLoc();
    MVT VT = Op.getValueType();
    MVT EltVT = VT;
@@ -5133,17 +5143,17 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
    }
    std::vector<Constant*> CV;
    if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
+    Constant *C = Context->getConstantFP(APFloat(APInt(64, 1ULL << 63)));
      CV.push_back(C);
      CV.push_back(C);
    } else {
-    Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
+    Constant *C = Context->getConstantFP(APFloat(APInt(32, 1U << 31)));
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
    }
-  Constant *C = ConstantVector::get(CV);
+  Constant *C = Context->getConstantVector(CV);
    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
    SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                                 PseudoSourceValue::getConstantPool(), 0,
@@ -5160,6 +5170,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
  }
  
  SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    DebugLoc dl = Op.getDebugLoc();
@@ -5183,15 +5194,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
    // First get the sign bit of second operand.
    std::vector<Constant*> CV;
    if (SrcVT == MVT::f64) {
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(64, 1ULL << 63))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(64, 0))));
    } else {
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 1U << 31))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
    }
-  Constant *C = ConstantVector::get(CV);
+  Constant *C = Context->getConstantVector(CV);
    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
    SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
                                  PseudoSourceValue::getConstantPool(), 0,
@@ -5212,15 +5223,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
    // Clear first operand sign bit.
    CV.clear();
    if (VT == MVT::f64) {
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(64, ~(1ULL << 63)))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(64, 0))));
    } else {
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, ~(1U << 31)))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
    }
-  C = ConstantVector::get(CV);
+  C = Context->getConstantVector(CV);
    CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
    SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                                  PseudoSourceValue::getConstantPool(), 0,
@@ -5461,7 +5472,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
          NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
          return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
        }
-      assert(0 && "Illegal FP comparison");
+      llvm_unreachable("Illegal FP comparison");
      }
      // Handle all other FP comparisons here.
      return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
@@ -5475,8 +5486,11 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
  
    switch (VT.getSimpleVT()) {
    default: break;
+  case MVT::v8i8:
    case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+  case MVT::v4i16:
    case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+  case MVT::v2i32:
    case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
    case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
    }
@@ -6247,7 +6261,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
        case Intrinsic::x86_mmx_psrai_d:
          NewIntNo = Intrinsic::x86_mmx_psra_d;
          break;
-      default: LLVM_UNREACHABLE("Impossible intrinsic");  // Can't reach here.
+      default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
        }
        break;
      }
@@ -6397,7 +6411,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
  
      switch (CC) {
      default:
-      assert(0 && "Unsupported calling convention");
+      llvm_unreachable("Unsupported calling convention");
      case CallingConv::C:
      case CallingConv::X86_StdCall: {
        // Pass 'nest' parameter in ECX.
@@ -6646,7 +6660,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
    DebugLoc dl = Op.getDebugLoc();
  
    switch (Op.getOpcode()) {
-  default: assert(0 && "Unknown ovf instruction!");
+  default: llvm_unreachable("Unknown ovf instruction!");
    case ISD::SADDO:
      // A subtract of one will be selected as a INC. Note that INC doesn't
      // set CF, so we can't do this for UADDO.
@@ -6768,7 +6782,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
  ///
  SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
    switch (Op.getOpcode()) {
-  default: assert(0 && "Should not custom lower this!");
+  default: llvm_unreachable("Should not custom lower this!");
    case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
    case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
    case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
@@ -7616,7 +7630,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      // Get the X86 opcode to use.
      unsigned Opc;
      switch (MI->getOpcode()) {
-    default: assert(0 && "illegal opcode!");
+    default: llvm_unreachable("illegal opcode!");
      case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
      case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
      case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
@@ -8355,7 +8369,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
    SDValue  ValOp = N->getOperand(0);
    switch (N->getOpcode()) {
    default:
-    assert(0 && "Unknown shift opcode!");
+    llvm_unreachable("Unknown shift opcode!");
      break;
    case ISD::SHL:
      if (VT == MVT::v2i64)
@@ -8651,6 +8665,100 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  //                           X86 Inline Assembly Support
  //===----------------------------------------------------------------------===//
  
+static bool LowerToBSwap(CallInst *CI) {
+  // FIXME: this should verify that we are targetting a 486 or better.  If not,
+  // we will turn this bswap into something that will be lowered to logical ops
+  // instead of emitting the bswap asm.  For now, we don't support 486 or lower
+  // so don't worry about this.
+  
+  // Verify this is a simple bswap.
+  if (CI->getNumOperands() != 2 ||
+      CI->getType() != CI->getOperand(1)->getType() ||
+      !CI->getType()->isInteger())
+    return false;
+  
+  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  if (!Ty || Ty->getBitWidth() % 16 != 0)
+    return false;
+  
+  // Okay, we can do this xform, do so now.
+  const Type *Tys[] = { Ty };
+  Module *M = CI->getParent()->getParent()->getParent();
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  
+  Value *Op = CI->getOperand(1);
+  Op = CallInst::Create(Int, Op, CI->getName(), CI);
+  
+  CI->replaceAllUsesWith(Op);
+  CI->eraseFromParent();
+  return true;
+}
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+  std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+
+  std::string AsmStr = IA->getAsmString();
+
+  // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+  std::vector<std::string> AsmPieces;
+  SplitString(AsmStr, AsmPieces, "\n");  // ; as separator?
+
+  switch (AsmPieces.size()) {
+  default: return false;
+  case 1:
+    AsmStr = AsmPieces[0];
+    AsmPieces.clear();
+    SplitString(AsmStr, AsmPieces, " \t");  // Split with whitespace.
+
+    // bswap $0
+    if (AsmPieces.size() == 2 &&
+        (AsmPieces[0] == "bswap" ||
+         AsmPieces[0] == "bswapq" ||
+         AsmPieces[0] == "bswapl") &&
+        (AsmPieces[1] == "$0" ||
+         AsmPieces[1] == "${0:q}")) {
+      // No need to check constraints, nothing other than the equivalent of
+      // "=r,0" would be valid here.
+      return LowerToBSwap(CI);
+    }
+    // rorw $$8, ${0:w}  -->  llvm.bswap.i16
+    if (CI->getType() == Type::Int16Ty &&
+        AsmPieces.size() == 3 &&
+        AsmPieces[0] == "rorw" &&
+        AsmPieces[1] == "$$8," &&
+        AsmPieces[2] == "${0:w}" &&
+        IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
+      return LowerToBSwap(CI);
+    }
+    break;
+  case 3:
+    if (CI->getType() == Type::Int64Ty && Constraints.size() >= 2 &&
+        Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+        Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+      // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
+      std::vector<std::string> Words;
+      SplitString(AsmPieces[0], Words, " \t");
+      if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+        Words.clear();
+        SplitString(AsmPieces[1], Words, " \t");
+        if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+          Words.clear();
+          SplitString(AsmPieces[2], Words, " \t,");
+          if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+              Words[2] == "%edx") {
+            return LowerToBSwap(CI);
+          }
+        }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
+
+
  /// getConstraintType - Given a constraint letter, return the type of
  /// constraint it is for this target.
  X86TargetLowering::ConstraintType
@@ -8833,7 +8941,37 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
      // FIXME: not handling fp-stack yet!
      switch (Constraint[0]) {      // GCC X86 Constraint Letters
      default: break;  // Unknown constraint letter
-    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
+    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+      if (Subtarget->is64Bit()) {
+        if (VT == MVT::i32)
+          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
+                                       X86::ESI, X86::EDI, X86::R8D, X86::R9D,
+                                       X86::R10D,X86::R11D,X86::R12D,
+                                       X86::R13D,X86::R14D,X86::R15D,
+                                       X86::EBP, X86::ESP, 0);
+        else if (VT == MVT::i16)
+          return make_vector<unsigned>(X86::AX,  X86::DX,  X86::CX, X86::BX,
+                                       X86::SI,  X86::DI,  X86::R8W,X86::R9W,
+                                       X86::R10W,X86::R11W,X86::R12W,
+                                       X86::R13W,X86::R14W,X86::R15W,
+                                       X86::BP,  X86::SP, 0);
+        else if (VT == MVT::i8)
+          return make_vector<unsigned>(X86::AL,  X86::DL,  X86::CL, X86::BL,
+                                       X86::SIL, X86::DIL, X86::R8B,X86::R9B,
+                                       X86::R10B,X86::R11B,X86::R12B,
+                                       X86::R13B,X86::R14B,X86::R15B,
+                                       X86::BPL, X86::SPL, 0);
+
+        else if (VT == MVT::i64)
+          return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
+                                       X86::RSI, X86::RDI, X86::R8,  X86::R9,
+                                       X86::R10, X86::R11, X86::R12,
+                                       X86::R13, X86::R14, X86::R15,
+                                       X86::RBP, X86::RSP, 0);
+
+        break;
+      }
+      // 32-bit fallthrough 
      case 'Q':   // Q_REGS
        if (VT == MVT::i32)
          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);