Revert the ConstantInt constructors back to their 2.5 forms where possible, thanks...

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a2fedaf924a075574d55711e196b4b251a878cd4..15c6dfead43c5bb42e06d84e8e7ca635a1330f9f 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -22,7 +22,9 @@
  #include "llvm/GlobalAlias.h"
  #include "llvm/GlobalVariable.h"
  #include "llvm/Function.h"
+#include "llvm/Instructions.h"
  #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
  #include "llvm/ADT/BitVector.h"
  #include "llvm/ADT/VectorExtras.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -64,7 +66,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
    setShiftAmountType(MVT::i8);
    setBooleanContents(ZeroOrOneBooleanContent);
    setSchedulingPreference(SchedulingForRegPressure);
-  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
    setStackPointerRegisterToSaveRestore(X86StackPtr);
  
    if (Subtarget->isTargetDarwin()) {
@@ -639,6 +640,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      setOperationAction(ISD::SELECT,             MVT::v4i16, Promote);
      setOperationAction(ISD::SELECT,             MVT::v2i32, Promote);
      setOperationAction(ISD::SELECT,             MVT::v1i64, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i8, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v4i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v2i32, Custom);
    }
  
    if (!UseSoftFloat && Subtarget->hasSSE1()) {
@@ -1048,7 +1052,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
    SmallVector<CCValAssign, 16> RVLocs;
    unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
    bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs, DAG.getContext());
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext());
    CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86);
  
    // If this is the first return lowered for this function, add the regs to the
@@ -1175,7 +1179,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
    bool isVarArg = TheCall->isVarArg();
    bool Is64Bit = Subtarget->is64Bit();
    CCState CCInfo(CallingConv, isVarArg, getTargetMachine(),
-                 RVLocs, DAG.getContext());
+                 RVLocs, *DAG.getContext());
    CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
  
    SmallVector<SDValue, 8> ResultVals;
@@ -1384,7 +1388,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
  
    // Assign locations to all of the incoming arguments.
    SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, DAG.getContext());
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext());
    CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC));
  
    SmallVector<SDValue, 8> ArgValues;
@@ -1679,7 +1683,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
  
    // Analyze operands of the call, assigning locations to each operand.
    SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, DAG.getContext());
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext());
    CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC));
  
    // Get a count of how many bytes are to be pushed on the stack.
@@ -4379,11 +4383,12 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
      // Bits [3:0] of the constant are the zero mask.  The DAG Combiner may
      //   combine either bitwise AND or insert of float 0.0 to set these bits.
      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+    // Create this as a scalar to vector..
+    N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
      return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
-  } else if (EVT == MVT::i32) {
-    // InsertPS works with constant index.
-    if (isa<ConstantSDNode>(N2))
-      return Op;
+  } else if (EVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+    // PINSR* works with constant index.
+    return Op;
    }
    return SDValue();
  }
@@ -4880,20 +4885,23 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
    */
  
    DebugLoc dl = Op.getDebugLoc();
+  LLVMContext *Context = DAG.getContext();
  
    // Build some magic constants.
    std::vector<Constant*> CV0;
-  CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0)));
-  Constant *C0 = ConstantVector::get(CV0);
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  Constant *C0 = Context->getConstantVector(CV0);
    SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
  
    std::vector<Constant*> CV1;
-  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
-  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
-  Constant *C1 = ConstantVector::get(CV1);
+  CV1.push_back(
+    Context->getConstantFP(APFloat(APInt(64, 0x4530000000000000ULL))));
+  CV1.push_back(
+    Context->getConstantFP(APFloat(APInt(64, 0x4330000000000000ULL))));
+  Constant *C1 = Context->getConstantVector(CV1);
    SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
  
    SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
@@ -5097,6 +5105,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
  }
  
  SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
    DebugLoc dl = Op.getDebugLoc();
    MVT VT = Op.getValueType();
    MVT EltVT = VT;
@@ -5104,17 +5113,17 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
      EltVT = VT.getVectorElementType();
    std::vector<Constant*> CV;
    if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
+    Constant *C = Context->getConstantFP(APFloat(APInt(64, ~(1ULL << 63))));
      CV.push_back(C);
      CV.push_back(C);
    } else {
-    Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
+    Constant *C = Context->getConstantFP(APFloat(APInt(32, ~(1U << 31))));
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
    }
-  Constant *C = ConstantVector::get(CV);
+  Constant *C = Context->getConstantVector(CV);
    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
    SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                                 PseudoSourceValue::getConstantPool(), 0,
@@ -5123,6 +5132,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
  }
  
  SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
    DebugLoc dl = Op.getDebugLoc();
    MVT VT = Op.getValueType();
    MVT EltVT = VT;
@@ -5133,17 +5143,17 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
    }
    std::vector<Constant*> CV;
    if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
+    Constant *C = Context->getConstantFP(APFloat(APInt(64, 1ULL << 63)));
      CV.push_back(C);
      CV.push_back(C);
    } else {
-    Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
+    Constant *C = Context->getConstantFP(APFloat(APInt(32, 1U << 31)));
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
      CV.push_back(C);
    }
-  Constant *C = ConstantVector::get(CV);
+  Constant *C = Context->getConstantVector(CV);
    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
    SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                                 PseudoSourceValue::getConstantPool(), 0,
@@ -5160,6 +5170,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
  }
  
  SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    DebugLoc dl = Op.getDebugLoc();
@@ -5183,15 +5194,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
    // First get the sign bit of second operand.
    std::vector<Constant*> CV;
    if (SrcVT == MVT::f64) {
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(64, 1ULL << 63))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(64, 0))));
    } else {
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 1U << 31))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
    }
-  Constant *C = ConstantVector::get(CV);
+  Constant *C = Context->getConstantVector(CV);
    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
    SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
                                  PseudoSourceValue::getConstantPool(), 0,
@@ -5212,15 +5223,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
    // Clear first operand sign bit.
    CV.clear();
    if (VT == MVT::f64) {
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(64, ~(1ULL << 63)))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(64, 0))));
    } else {
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, ~(1U << 31)))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
+    CV.push_back(Context->getConstantFP(APFloat(APInt(32, 0))));
    }
-  C = ConstantVector::get(CV);
+  C = Context->getConstantVector(CV);
    CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
    SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                                  PseudoSourceValue::getConstantPool(), 0,
@@ -5475,8 +5486,11 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
  
    switch (VT.getSimpleVT()) {
    default: break;
+  case MVT::v8i8:
    case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+  case MVT::v4i16:
    case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+  case MVT::v2i32:
    case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
    case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
    }
@@ -8651,6 +8665,100 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  //                           X86 Inline Assembly Support
  //===----------------------------------------------------------------------===//
  
+static bool LowerToBSwap(CallInst *CI) {
+  // FIXME: this should verify that we are targetting a 486 or better.  If not,
+  // we will turn this bswap into something that will be lowered to logical ops
+  // instead of emitting the bswap asm.  For now, we don't support 486 or lower
+  // so don't worry about this.
+  
+  // Verify this is a simple bswap.
+  if (CI->getNumOperands() != 2 ||
+      CI->getType() != CI->getOperand(1)->getType() ||
+      !CI->getType()->isInteger())
+    return false;
+  
+  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  if (!Ty || Ty->getBitWidth() % 16 != 0)
+    return false;
+  
+  // Okay, we can do this xform, do so now.
+  const Type *Tys[] = { Ty };
+  Module *M = CI->getParent()->getParent()->getParent();
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  
+  Value *Op = CI->getOperand(1);
+  Op = CallInst::Create(Int, Op, CI->getName(), CI);
+  
+  CI->replaceAllUsesWith(Op);
+  CI->eraseFromParent();
+  return true;
+}
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+  std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+
+  std::string AsmStr = IA->getAsmString();
+
+  // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+  std::vector<std::string> AsmPieces;
+  SplitString(AsmStr, AsmPieces, "\n");  // ; as separator?
+
+  switch (AsmPieces.size()) {
+  default: return false;
+  case 1:
+    AsmStr = AsmPieces[0];
+    AsmPieces.clear();
+    SplitString(AsmStr, AsmPieces, " \t");  // Split with whitespace.
+
+    // bswap $0
+    if (AsmPieces.size() == 2 &&
+        (AsmPieces[0] == "bswap" ||
+         AsmPieces[0] == "bswapq" ||
+         AsmPieces[0] == "bswapl") &&
+        (AsmPieces[1] == "$0" ||
+         AsmPieces[1] == "${0:q}")) {
+      // No need to check constraints, nothing other than the equivalent of
+      // "=r,0" would be valid here.
+      return LowerToBSwap(CI);
+    }
+    // rorw $$8, ${0:w}  -->  llvm.bswap.i16
+    if (CI->getType() == Type::Int16Ty &&
+        AsmPieces.size() == 3 &&
+        AsmPieces[0] == "rorw" &&
+        AsmPieces[1] == "$$8," &&
+        AsmPieces[2] == "${0:w}" &&
+        IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
+      return LowerToBSwap(CI);
+    }
+    break;
+  case 3:
+    if (CI->getType() == Type::Int64Ty && Constraints.size() >= 2 &&
+        Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+        Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+      // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
+      std::vector<std::string> Words;
+      SplitString(AsmPieces[0], Words, " \t");
+      if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+        Words.clear();
+        SplitString(AsmPieces[1], Words, " \t");
+        if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+          Words.clear();
+          SplitString(AsmPieces[2], Words, " \t,");
+          if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+              Words[2] == "%edx") {
+            return LowerToBSwap(CI);
+          }
+        }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
+
+
  /// getConstraintType - Given a constraint letter, return the type of
  /// constraint it is for this target.
  X86TargetLowering::ConstraintType
@@ -8833,7 +8941,37 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
      // FIXME: not handling fp-stack yet!
      switch (Constraint[0]) {      // GCC X86 Constraint Letters
      default: break;  // Unknown constraint letter
-    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
+    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+      if (Subtarget->is64Bit()) {
+        if (VT == MVT::i32)
+          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
+                                       X86::ESI, X86::EDI, X86::R8D, X86::R9D,
+                                       X86::R10D,X86::R11D,X86::R12D,
+                                       X86::R13D,X86::R14D,X86::R15D,
+                                       X86::EBP, X86::ESP, 0);
+        else if (VT == MVT::i16)
+          return make_vector<unsigned>(X86::AX,  X86::DX,  X86::CX, X86::BX,
+                                       X86::SI,  X86::DI,  X86::R8W,X86::R9W,
+                                       X86::R10W,X86::R11W,X86::R12W,
+                                       X86::R13W,X86::R14W,X86::R15W,
+                                       X86::BP,  X86::SP, 0);
+        else if (VT == MVT::i8)
+          return make_vector<unsigned>(X86::AL,  X86::DL,  X86::CL, X86::BL,
+                                       X86::SIL, X86::DIL, X86::R8B,X86::R9B,
+                                       X86::R10B,X86::R11B,X86::R12B,
+                                       X86::R13B,X86::R14B,X86::R15B,
+                                       X86::BPL, X86::SPL, 0);
+
+        else if (VT == MVT::i64)
+          return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
+                                       X86::RSI, X86::RDI, X86::R8,  X86::R9,
+                                       X86::R10, X86::R11, X86::R12,
+                                       X86::R13, X86::R14, X86::R15,
+                                       X86::RBP, X86::RSP, 0);
+
+        break;
+      }
+      // 32-bit fallthrough 
      case 'Q':   // Q_REGS
        if (VT == MVT::i32)
          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);