X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=f8ff6a055c3239aa7c1f6429015d4a9e2162672c;hb=cdbe4d3ebe50bd3709b08ee59bb09ccf3a1fbd47;hp=dfc9680f987bed8fc2d1d469f0d869a8e312e01b;hpb=ab5b49d92e8bdcad5d05e96a7b8390944de7ac1a;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dfc9680f987..f8ff6a055c3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86.h" +#include "X86CodeEmitter.h" #include "X86InstrBuilder.h" #include "X86ISelLowering.h" #include "X86MachineFunctionInfo.h" @@ -20,6 +21,7 @@ #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/ADT/VectorExtras.h" @@ -33,6 +35,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ParameterAttributes.h" using namespace llvm; X86TargetLowering::X86TargetLowering(TargetMachine &TM) @@ -41,6 +44,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) X86ScalarSSE = Subtarget->hasSSE2(); X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; + RegInfo = TM.getRegisterInfo(); + // Set up the TargetLowering object. // X86 is weird, it always uses i8 for shift amounts and setcc results. @@ -196,10 +201,14 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) } // X86 ret instruction may pop stack. setOperationAction(ISD::RET , MVT::Other, Custom); + if (!Subtarget->is64Bit()) + setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); + // Darwin ABI issue. setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); setOperationAction(ISD::JumpTable , MVT::i32 , Custom); setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); @@ -224,6 +233,23 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) !Subtarget->isTargetCygMing()) setOperationAction(ISD::LABEL, MVT::Other, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + if (Subtarget->is64Bit()) { + // FIXME: Verify + setExceptionPointerRegister(X86::RAX); + setExceptionSelectorRegister(X86::RDX); + } else { + setExceptionPointerRegister(X86::EAX); + setExceptionSelectorRegister(X86::EDX); + } + + setOperationAction(ISD::ADJUST_TRAMP, MVT::i32, Expand); + setOperationAction(ISD::ADJUST_TRAMP, MVT::i64, Expand); + setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAARG , MVT::Other, Expand); @@ -237,7 +263,10 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); + if (Subtarget->isTargetCygMing()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); if (X86ScalarSSE) { // Set up the FP register classes. @@ -271,11 +300,14 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) addLegalFPImmediate(+0.0); // xorps / xorpd } else { // Set up the FP register classes. - addRegisterClass(MVT::f64, X86::RFPRegisterClass); + addRegisterClass(MVT::f64, X86::RFP64RegisterClass); + addRegisterClass(MVT::f32, X86::RFP32RegisterClass); setOperationAction(ISD::UNDEF, MVT::f64, Expand); + setOperationAction(ISD::UNDEF, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f32, Expand); if (!UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); @@ -283,19 +315,24 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) } setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); addLegalFPImmediate(+0.0); // FLD0 addLegalFPImmediate(+1.0); // FLD1 addLegalFPImmediate(-0.0); // FLD0/FCHS addLegalFPImmediate(-1.0); // FLD1/FCHS } + // Long double always uses X87. + addRegisterClass(MVT::f80, X86::RFP80RegisterClass); + // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. - for (unsigned VT = (unsigned)MVT::Vector + 1; - VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); @@ -308,6 +345,13 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); } if (Subtarget->hasMMX()) { @@ -321,6 +365,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::ADD, MVT::v8i8, Legal); setOperationAction(ISD::ADD, MVT::v4i16, Legal); setOperationAction(ISD::ADD, MVT::v2i32, Legal); + setOperationAction(ISD::ADD, MVT::v1i64, Legal); setOperationAction(ISD::SUB, MVT::v8i8, Legal); setOperationAction(ISD::SUB, MVT::v4i16, Legal); @@ -361,13 +406,20 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v1i64, Legal); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); + + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); } if (Subtarget->hasSSE1()) { @@ -377,6 +429,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::FSUB, MVT::v4f32, Legal); setOperationAction(ISD::FMUL, MVT::v4f32, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEG, MVT::v4f32, Custom); setOperationAction(ISD::LOAD, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); @@ -404,6 +458,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::FSUB, MVT::v2f64, Legal); setOperationAction(ISD::FMUL, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::FNEG, MVT::v2f64, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); @@ -476,7 +532,8 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { SmallVector RVLocs; unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - CCState CCInfo(CC, getTargetMachine(), RVLocs); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); @@ -525,7 +582,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); } - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); + SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); Chain = Value.getValue(1); @@ -556,7 +613,8 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, // Assign locations to each value returned by this call. SmallVector RVLocs; - CCState CCInfo(CallingConv, getTargetMachine(), RVLocs); + bool isVarArg = cast(TheCall->getOperand(2))->getValue() != 0; + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); @@ -575,7 +633,7 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, // before the fp stackifier runs. // Copy ST0 into an RFP register with FP_GET_RESULT. - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); SDOperand GROps[] = { Chain, InFlag }; SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); Chain = RetVal.getValue(1); @@ -597,11 +655,6 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); Chain = RetVal.getValue(1); } - - if (RVLocs[0].getValVT() == MVT::f32 && !X86ScalarSSE) - // FIXME: we would really like to remember that this FP_ROUND - // operation is okay to eliminate if we allow excess FP precision. - RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); ResultVals.push_back(RetVal); } @@ -641,8 +694,8 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), - ArgLocs); + CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + getTargetMachine(), ArgLocs); CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); SmallVector ArgValues; @@ -720,7 +773,8 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. ReturnAddrIndex = 0; // No return address slot generated yet. - MF.getInfo()->setBytesToPopOnReturn(BytesToPopOnReturn); + MF.getInfo() + ->setBytesToPopOnReturn(BytesToPopOnReturn); // Return the new list of results. return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), @@ -737,7 +791,7 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, getTargetMachine(), ArgLocs); + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); // Get a count of how many bytes are to be pushed on the stack. @@ -892,11 +946,12 @@ X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), - ArgLocs); + CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + getTargetMachine(), ArgLocs); CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); SmallVector ArgValues; @@ -964,7 +1019,8 @@ X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. BytesCallerReserves = 0; - MF.getInfo()->setBytesToPopOnReturn(BytesToPopOnReturn); + MF.getInfo() + ->setBytesToPopOnReturn(BytesToPopOnReturn); // Return the new list of results. return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), @@ -975,11 +1031,12 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC) { SDOperand Chain = Op.getOperand(0); bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; SDOperand Callee = Op.getOperand(4); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, getTargetMachine(), ArgLocs); + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); // Get a count of how many bytes are to be pushed on the stack. @@ -1128,8 +1185,8 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), - ArgLocs); + CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + getTargetMachine(), ArgLocs); CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); SmallVector ArgValues; @@ -1155,7 +1212,11 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { RC = X86::FR64RegisterClass; else { assert(MVT::isVector(RegVT)); - RC = X86::VR128RegisterClass; + if (MVT::getSizeInBits(RegVT) == 64) { + RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. + RegVT = MVT::i64; + } else + RC = X86::VR128RegisterClass; } unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); @@ -1174,6 +1235,11 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { if (VA.getLocInfo() != CCValAssign::Full) ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); + // Handle MMX values passed in GPRs. + if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && + MVT::getSizeInBits(RegVT) == 64) + ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); + ArgValues.push_back(ArgValue); } else { assert(VA.isMemLoc()); @@ -1255,7 +1321,7 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, getTargetMachine(), ArgLocs); + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); // Get a count of how many bytes are to be pushed on the stack. @@ -1339,8 +1405,8 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, // We should use extra load for direct calls to dllimported functions in // non-JIT mode. if (getTargetMachine().getCodeModel() != CodeModel::Large - && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), - getTargetMachine(), true)) + && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), + getTargetMachine(), true)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) if (getTargetMachine().getCodeModel() != CodeModel::Large) @@ -1515,7 +1581,7 @@ static bool isUndefOrEqual(SDOperand Op, unsigned Val) { bool X86::isPSHUFDMask(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR); - if (N->getNumOperands() != 4) + if (N->getNumOperands() != 2 && N->getNumOperands() != 4) return false; // Check if the value doesn't reference the second vector. @@ -1523,7 +1589,7 @@ bool X86::isPSHUFDMask(SDNode *N) { SDOperand Arg = N->getOperand(i); if (Arg.getOpcode() == ISD::UNDEF) continue; assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast(Arg)->getValue() >= 4) + if (cast(Arg)->getValue() >= e) return false; } @@ -1602,7 +1668,7 @@ bool X86::isSHUFPMask(SDNode *N) { return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); } -/// isCommutedSHUFP - Returns true if the shuffle mask is except +/// isCommutedSHUFP - Returns true if the shuffle mask is exactly /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. @@ -1763,7 +1829,7 @@ bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR); unsigned NumElems = N->getNumOperands(); - if (NumElems != 4 && NumElems != 8 && NumElems != 16) + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { @@ -1779,6 +1845,29 @@ bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { return true; } +/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form +/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, +/// <2, 2, 3, 3> +bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { + SDOperand BitI = N->getOperand(i); + SDOperand BitI1 = N->getOperand(i + 1); + + if (!isUndefOrEqual(BitI, j)) + return false; + if (!isUndefOrEqual(BitI1, j)) + return false; + } + + return true; +} + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. @@ -1894,6 +1983,16 @@ bool X86::isMOVSLDUPMask(SDNode *N) { return HasHi; } +/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a identity operation on the LHS or RHS. +static bool isIdentityMask(SDNode *N, bool RHS = false) { + unsigned NumElems = N->getNumOperands(); + for (unsigned i = 0; i < NumElems; ++i) + if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) + return false; + return true; +} + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies /// a splat of a single element. static bool isSplatMask(SDNode *N) { @@ -2046,7 +2145,7 @@ static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, SelectionDAG &DAG) { MVT::ValueType VT = Op.getValueType(); MVT::ValueType MaskVT = Mask.getValueType(); - MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); unsigned NumElems = Mask.getNumOperands(); SmallVector MaskVec; @@ -2137,7 +2236,7 @@ static bool isSplatVector(SDNode *N) { /// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved /// to an undef. static bool isUndefShuffle(SDNode *N) { - if (N->getOpcode() != ISD::BUILD_VECTOR) + if (N->getOpcode() != ISD::VECTOR_SHUFFLE) return false; SDOperand V1 = N->getOperand(0); @@ -2157,6 +2256,61 @@ static bool isUndefShuffle(SDNode *N) { return true; } +/// isZeroNode - Returns true if Elt is a constant zero or a floating point +/// constant +0.0. +static inline bool isZeroNode(SDOperand Elt) { + return ((isa(Elt) && + cast(Elt)->getValue() == 0) || + (isa(Elt) && + cast(Elt)->isExactlyValue(0.0))); +} + +/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved +/// to an zero vector. +static bool isZeroShuffle(SDNode *N) { + if (N->getOpcode() != ISD::VECTOR_SHUFFLE) + return false; + + SDOperand V1 = N->getOperand(0); + SDOperand V2 = N->getOperand(1); + SDOperand Mask = N->getOperand(2); + unsigned NumElems = Mask.getNumOperands(); + for (unsigned i = 0; i != NumElems; ++i) { + SDOperand Arg = Mask.getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + unsigned Idx = cast(Arg)->getValue(); + if (Idx < NumElems) { + unsigned Opc = V1.Val->getOpcode(); + if (Opc == ISD::UNDEF) + continue; + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V1.Val->getOperand(Idx))) + return false; + } else if (Idx >= NumElems) { + unsigned Opc = V2.Val->getOpcode(); + if (Opc == ISD::UNDEF) + continue; + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V2.Val->getOperand(Idx - NumElems))) + return false; + } + } + } + return true; +} + +/// getZeroVector - Returns a vector of specified type with all zero elements. +/// +static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { + assert(MVT::isVector(VT) && "Expected a vector type"); + unsigned NumElems = MVT::getVectorNumElements(VT); + MVT::ValueType EVT = MVT::getVectorElementType(VT); + bool isFP = MVT::isFloatingPoint(EVT); + SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); + SmallVector ZeroVec(NumElems, Zero); + return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); +} + /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { @@ -2187,7 +2341,7 @@ static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { /// operation of specified width. static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); @@ -2200,7 +2354,7 @@ static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { /// of specified width. static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; for (unsigned i = 0, e = NumElems/2; i != e; ++i) { MaskVec.push_back(DAG.getConstant(i, BaseVT)); @@ -2213,7 +2367,7 @@ static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { /// of specified width. static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); unsigned Half = NumElems/2; SmallVector MaskVec; for (unsigned i = 0; i != Half; ++i) { @@ -2223,18 +2377,6 @@ static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); } -/// getZeroVector - Returns a vector of specified type with all zero elements. -/// -static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { - assert(MVT::isVector(VT) && "Expected a vector type"); - unsigned NumElems = getVectorNumElements(VT); - MVT::ValueType EVT = MVT::getVectorBaseType(VT); - bool isFP = MVT::isFloatingPoint(EVT); - SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); - SmallVector ZeroVec(NumElems, Zero); - return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); -} - /// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. /// static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { @@ -2256,23 +2398,14 @@ static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); } -/// isZeroNode - Returns true if Elt is a constant zero or a floating point -/// constant +0.0. -static inline bool isZeroNode(SDOperand Elt) { - return ((isa(Elt) && - cast(Elt)->getValue() == 0) || - (isa(Elt) && - cast(Elt)->isExactlyValue(0.0))); -} - /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified -/// vector and zero or undef vector. +/// vector of zero or undef vector. static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, unsigned NumElems, unsigned Idx, bool isZero, SelectionDAG &DAG) { SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); SDOperand Zero = DAG.getConstant(0, EVT); SmallVector MaskVec(NumElems, Zero); MaskVec[Idx] = DAG.getConstant(NumElems, EVT); @@ -2364,13 +2497,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { return Op; MVT::ValueType VT = Op.getValueType(); - MVT::ValueType EVT = MVT::getVectorBaseType(VT); + MVT::ValueType EVT = MVT::getVectorElementType(VT); unsigned EVTBits = MVT::getSizeInBits(EVT); unsigned NumElems = Op.getNumOperands(); unsigned NumZero = 0; unsigned NumNonZero = 0; unsigned NonZeros = 0; + unsigned NumNonZeroImms = 0; std::set Values; for (unsigned i = 0; i < NumElems; ++i) { SDOperand Elt = Op.getOperand(i); @@ -2381,13 +2515,21 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { else { NonZeros |= (1 << i); NumNonZero++; + if (Elt.getOpcode() == ISD::Constant || + Elt.getOpcode() == ISD::ConstantFP) + NumNonZeroImms++; } } } - if (NumNonZero == 0) - // Must be a mix of zero and undef. Return a zero vector. - return getZeroVector(VT, DAG); + if (NumNonZero == 0) { + if (NumZero == 0) + // All undef vector. Return an UNDEF. + return DAG.getNode(ISD::UNDEF, VT); + else + // A mix of zero and undef. Return a zero vector. + return getZeroVector(VT, DAG); + } // Splat is obviously ok. Let legalizer expand it to a shuffle. if (Values.size() == 1) @@ -2408,7 +2550,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, DAG); MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; for (unsigned i = 0; i < NumElems; i++) MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); @@ -2419,18 +2561,23 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { } } - // Let legalizer expand 2-wide build_vector's. + // A vector full of immediates; various special cases are already + // handled, so this is best done with a single constant-pool load. + if (NumNonZero == NumNonZeroImms) + return SDOperand(); + + // Let legalizer expand 2-wide build_vectors. if (EVTBits == 64) return SDOperand(); // If element VT is < 32 bits, convert it to inserts into a zero vector. - if (EVTBits == 8) { + if (EVTBits == 8 && NumElems == 16) { SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, *this); if (V.Val) return V; } - if (EVTBits == 16) { + if (EVTBits == 16 && NumElems == 8) { SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, *this); if (V.Val) return V; @@ -2477,7 +2624,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) return V[0]; MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; bool Reverse = (NonZeros & 0x3) == 2; for (unsigned i = 0; i < 2; ++i) @@ -2533,6 +2680,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { if (isUndefShuffle(Op.Val)) return DAG.getNode(ISD::UNDEF, VT); + if (isZeroShuffle(Op.Val)) + return getZeroVector(VT, DAG); + + if (isIdentityMask(PermMask.Val)) + return V1; + else if (isIdentityMask(PermMask.Val, true)) + return V2; + if (isSplatMask(PermMask.Val)) { if (NumElems <= 4) return Op; // Promote it to a v4i32 splat. @@ -2578,6 +2733,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { } if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || + X86::isUNPCKH_v_undef_Mask(PermMask.Val) || X86::isUNPCKLMask(PermMask.Val) || X86::isUNPCKHMask(PermMask.Val)) return Op; @@ -2606,6 +2762,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { // Commute is back and try unpck* again. Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || + X86::isUNPCKH_v_undef_Mask(PermMask.Val) || X86::isUNPCKLMask(PermMask.Val) || X86::isUNPCKHMask(PermMask.Val)) return Op; @@ -2613,7 +2770,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { // If VT is integer, try PSHUF* first, then SHUFP*. if (MVT::isInteger(VT)) { - if (X86::isPSHUFDMask(PermMask.Val) || + // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically + // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. + if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && + X86::isPSHUFDMask(PermMask.Val)) || X86::isPSHUFHWMask(PermMask.Val) || X86::isPSHUFLWMask(PermMask.Val)) { if (V2.getOpcode() != ISD::UNDEF) @@ -2622,13 +2782,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { return Op; } - if (X86::isSHUFPMask(PermMask.Val)) + if (X86::isSHUFPMask(PermMask.Val) && + MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. return Op; // Handle v8i16 shuffle high / low shuffle node pair. if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; for (unsigned i = 0; i != 4; ++i) MaskVec.push_back(PermMask.getOperand(i)); @@ -2659,9 +2820,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { } } - if (NumElems == 4) { + if (NumElems == 4 && + // Don't do this for MMX. + MVT::getSizeInBits(VT) != 64) { MVT::ValueType MaskVT = PermMask.getValueType(); - MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); SmallVector, 8> Locs; Locs.reserve(NumElems); SmallVector Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); @@ -2786,10 +2949,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // SHUFPS the element to the lowest double word, then movss. MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); SmallVector IdxVec; - IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), @@ -2807,8 +2970,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // to a f64mem, the whole operation is folded into a single MOVHPDmr. MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); SmallVector IdxVec; - IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), @@ -2825,7 +2988,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. MVT::ValueType VT = Op.getValueType(); - MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); + MVT::ValueType BaseVT = MVT::getVectorElementType(VT); SDOperand N0 = Op.getOperand(0); SDOperand N1 = Op.getOperand(1); SDOperand N2 = Op.getOperand(2); @@ -2833,7 +2996,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { if (N1.getValueType() != MVT::i32) N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); if (N2.getValueType() != MVT::i32) - N2 = DAG.getConstant(cast(N2)->getValue(), MVT::i32); + N2 = DAG.getConstant(cast(N2)->getValue(),getPointerTy()); return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); } else if (MVT::getSizeInBits(BaseVT) == 32) { unsigned Idx = cast(N2)->getValue(); @@ -2841,7 +3004,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // Use a movss. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; MaskVec.push_back(DAG.getConstant(4, BaseVT)); for (unsigned i = 1; i <= 3; ++i) @@ -2853,17 +3016,10 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // Use two pinsrw instructions to insert a 32 bit value. Idx <<= 1; if (MVT::isFloatingPoint(N1.getValueType())) { - if (ISD::isNON_EXTLoad(N1.Val)) { - // Just load directly from f32mem to GR32. - LoadSDNode *LD = cast(N1); - N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset()); - } else { - N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); - N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); - N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, - DAG.getConstant(0, getPointerTy())); - } + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); + N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); + N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, + DAG.getConstant(0, getPointerTy())); } N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, @@ -2932,6 +3088,81 @@ X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { return Result; } +// Lower ISD::GlobalTLSAddress using the "general dynamic" model +static SDOperand +LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const MVT::ValueType PtrVT) { + SDOperand InFlag; + SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, + PtrVT), InFlag); + InFlag = Chain.getValue(1); + + // emit leal symbol@TLSGD(,%ebx,1), %eax + SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); + SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + GA->getValueType(0), + GA->getOffset()); + SDOperand Ops[] = { Chain, TGA, InFlag }; + SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); + InFlag = Result.getValue(2); + Chain = Result.getValue(1); + + // call ___tls_get_addr. This function receives its argument in + // the register EAX. + Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); + InFlag = Chain.getValue(1); + + NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDOperand Ops1[] = { Chain, + DAG.getTargetExternalSymbol("___tls_get_addr", + PtrVT), + DAG.getRegister(X86::EAX, PtrVT), + DAG.getRegister(X86::EBX, PtrVT), + InFlag }; + Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); + InFlag = Chain.getValue(1); + + return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); +} + +// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or +// "local exec" model. +static SDOperand +LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const MVT::ValueType PtrVT) { + // Get the Thread Pointer + SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); + // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial + // exec) + SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + GA->getValueType(0), + GA->getOffset()); + SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); + + if (GA->getGlobal()->isDeclaration()) // initial exec TLS model + Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); + + // The address of the thread local variable is the add of the thread + // pointer with the offset of the variable. + return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); +} + +SDOperand +X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { + // TODO: implement the "local dynamic" model + // TODO: implement the "initial exec"model for pic executables + assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && + "TLS not implemented for non-ELF and 64-bit targets"); + GlobalAddressSDNode *GA = cast(Op); + // If the relocation model is PIC, use the "General Dynamic" TLS Model, + // otherwise use the "Local Exec"TLS Model + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); + else + return LowerToTLSExecModel(GA, DAG, getPointerTy()); +} + SDOperand X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { const char *Sym = cast(Op)->getSymbol(); @@ -3050,7 +3281,7 @@ SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { if (X86ScalarSSE) Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); else - Tys = DAG.getVTList(MVT::f64, MVT::Other); + Tys = DAG.getVTList(Op.getValueType(), MVT::Other); SmallVector Ops; Ops.push_back(Chain); Ops.push_back(StackSlot); @@ -3105,7 +3336,7 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { if (X86ScalarSSE) { assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); + SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDOperand Ops[] = { Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) }; @@ -3125,50 +3356,62 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType VT = Op.getValueType(); - const Type *OpNTy = MVT::getTypeForValueType(VT); + MVT::ValueType EltVT = VT; + if (MVT::isVector(VT)) + EltVT = MVT::getVectorElementType(VT); + const Type *OpNTy = MVT::getTypeForValueType(EltVT); std::vector CV; - if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); + if (EltVT == MVT::f64) { + Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))); + CV.push_back(C); + CV.push_back(C); } else { - CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - } - Constant *CS = ConstantStruct::get(CV); - SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SmallVector Ops; - Ops.push_back(DAG.getEntryNode()); - Ops.push_back(CPIdx); - Ops.push_back(DAG.getSrcValue(NULL)); - SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); + Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + } + Constant *C = ConstantVector::get(CV); + SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); + SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, + false, 16); return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); } SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType VT = Op.getValueType(); - const Type *OpNTy = MVT::getTypeForValueType(VT); + MVT::ValueType EltVT = VT; + unsigned EltNum = 1; + if (MVT::isVector(VT)) { + EltVT = MVT::getVectorElementType(VT); + EltNum = MVT::getVectorNumElements(VT); + } + const Type *OpNTy = MVT::getTypeForValueType(EltVT); std::vector CV; - if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); + if (EltVT == MVT::f64) { + Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)); + CV.push_back(C); + CV.push_back(C); } else { - CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - } - Constant *CS = ConstantStruct::get(CV); - SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SmallVector Ops; - Ops.push_back(DAG.getEntryNode()); - Ops.push_back(CPIdx); - Ops.push_back(DAG.getSrcValue(NULL)); - SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); - return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); + Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31)); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + } + Constant *C = ConstantVector::get(CV); + SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); + SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, + false, 16); + if (MVT::isVector(VT)) { + return DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::XOR, MVT::v2i64, + DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), + DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); + } else { + return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); + } } SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { @@ -3195,14 +3438,10 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { CV.push_back(ConstantFP::get(SrcTy, 0.0)); CV.push_back(ConstantFP::get(SrcTy, 0.0)); } - Constant *CS = ConstantStruct::get(CV); - SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); - SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other); - SmallVector Ops; - Ops.push_back(DAG.getEntryNode()); - Ops.push_back(CPIdx); - Ops.push_back(DAG.getSrcValue(NULL)); - SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); + Constant *C = ConstantVector::get(CV); + SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); + SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, + false, 16); SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); // Shift sign bit right or left if the two operands have different types. @@ -3227,14 +3466,10 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { CV.push_back(ConstantFP::get(SrcTy, 0.0)); CV.push_back(ConstantFP::get(SrcTy, 0.0)); } - CS = ConstantStruct::get(CV); - CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); - Tys = DAG.getVTList(VT, MVT::Other); - Ops.clear(); - Ops.push_back(DAG.getEntryNode()); - Ops.push_back(CPIdx); - Ops.push_back(DAG.getSrcValue(NULL)); - SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); + C = ConstantVector::get(CV); + CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); + SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, + false, 16); SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); // Or the value with the sign bit. @@ -3395,6 +3630,48 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { } } + +// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. +// Calls to _alloca is needed to probe the stack when allocating more than 4k +// bytes in one go. Touching the stack at 4K increments is necessary to ensure +// that the guard pages used by the OS virtual memory manager are allocated in +// correct sequence. +SDOperand +X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, + SelectionDAG &DAG) { + assert(Subtarget->isTargetCygMing() && + "This should be used only on Cygwin/Mingw targets"); + + // Get the inputs. + SDOperand Chain = Op.getOperand(0); + SDOperand Size = Op.getOperand(1); + // FIXME: Ensure alignment here + + SDOperand Flag; + + MVT::ValueType IntPtr = getPointerTy(); + MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + + Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDOperand Ops[] = { Chain, + DAG.getTargetExternalSymbol("_alloca", IntPtr), + DAG.getRegister(X86::EAX, IntPtr), + Flag }; + Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); + Flag = Chain.getValue(1); + + Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); + + std::vector Tys; + Tys.push_back(SPTy); + Tys.push_back(MVT::Other); + SDOperand Ops1[2] = { Chain.getValue(0), Chain }; + return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); +} + SDOperand X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); @@ -3402,7 +3679,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { if (Fn->hasExternalLinkage() && Subtarget->isTargetCygMing() && Fn->getName() == "main") - MF.getInfo()->setForceFramePointer(true); + MF.getInfo()->setForceFramePointer(true); unsigned CC = cast(Op.getOperand(1))->getValue(); if (Subtarget->is64Bit()) @@ -3418,10 +3695,10 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { case CallingConv::C: return LowerCCCArguments(Op, DAG); case CallingConv::X86_StdCall: - MF.getInfo()->setDecorationStyle(StdCall); + MF.getInfo()->setDecorationStyle(StdCall); return LowerCCCArguments(Op, DAG, true); case CallingConv::X86_FastCall: - MF.getInfo()->setDecorationStyle(FastCall); + MF.getInfo()->setDecorationStyle(FastCall); return LowerFastCCArguments(Op, DAG); } } @@ -3960,6 +4237,122 @@ SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { DAG.getConstant(4, getPointerTy())); } +SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, + SelectionDAG &DAG) { + // Is not yet supported on x86-64 + if (Subtarget->is64Bit()) + return SDOperand(); + + return DAG.getConstant(8, getPointerTy()); +} + +SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) +{ + assert(!Subtarget->is64Bit() && + "Lowering of eh_return builtin is not supported yet on x86-64"); + + MachineFunction &MF = DAG.getMachineFunction(); + SDOperand Chain = Op.getOperand(0); + SDOperand Offset = Op.getOperand(1); + SDOperand Handler = Op.getOperand(2); + + SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), + getPointerTy()); + + SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, + DAG.getConstant(-4UL, getPointerTy())); + StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); + Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); + Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); + MF.addLiveOut(X86::ECX); + + return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, + Chain, DAG.getRegister(X86::ECX, getPointerTy())); +} + +SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, + SelectionDAG &DAG) { + SDOperand Root = Op.getOperand(0); + SDOperand Trmp = Op.getOperand(1); // trampoline + SDOperand FPtr = Op.getOperand(2); // nested function + SDOperand Nest = Op.getOperand(3); // 'nest' parameter value + + SrcValueSDNode *TrmpSV = cast(Op.getOperand(4)); + + if (Subtarget->is64Bit()) { + return SDOperand(); // not yet supported + } else { + Function *Func = (Function *) + cast(cast(Op.getOperand(5))->getValue()); + unsigned CC = Func->getCallingConv(); + unsigned char NestReg; + + switch (CC) { + default: + assert(0 && "Unsupported calling convention"); + case CallingConv::C: + case CallingConv::Fast: + case CallingConv::X86_StdCall: { + // Pass 'nest' parameter in ECX. + // Must be kept in sync with X86CallingConv.td + NestReg = N86::ECX; + + // Check that ECX wasn't needed by an 'inreg' parameter. + const FunctionType *FTy = Func->getFunctionType(); + const ParamAttrsList *Attrs = FTy->getParamAttrs(); + + if (Attrs && !Func->isVarArg()) { + unsigned InRegCount = 0; + unsigned Idx = 1; + + for (FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); I != E; ++I, ++Idx) + if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) + // FIXME: should only count parameters that are lowered to integers. + InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; + + if (InRegCount > 2) { + cerr << "Nest register in use - reduce number of inreg parameters!\n"; + abort(); + } + } + break; + } + case CallingConv::X86_FastCall: + // Pass 'nest' parameter in EAX. + // Must be kept in sync with X86CallingConv.td + NestReg = N86::EAX; + break; + } + + SDOperand OutChains[4]; + SDOperand Addr, Disp; + + Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); + Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); + + const unsigned char MOV32ri = 0xB8; + const unsigned char JMP = 0xE9; + + OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|NestReg, MVT::i8), + Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); + + Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); + OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), + TrmpSV->getOffset() + 1, false, 1); + + Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); + OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, + TrmpSV->getValue() + 5, TrmpSV->getOffset()); + + Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); + OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), + TrmpSV->getOffset() + 6, false, 1); + + return DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4); + } +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -3972,6 +4365,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::SHL_PARTS: case ISD::SRA_PARTS: @@ -3996,6 +4390,11 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::FRAME_TO_ARGS_OFFSET: + return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); + case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); } return SDOperand(); } @@ -4030,8 +4429,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; - case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; - case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::S2VEC: return "X86ISD::S2VEC"; @@ -4039,59 +4436,62 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; + case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; + case X86ISD::FRCP: return "X86ISD::FRCP"; + case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; + case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; + case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; } } -/// isLegalAddressImmediate - Return true if the integer value can be used -/// as the offset of the target addressing mode for load / store of the -/// given type. -bool X86TargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ - // X86 allows a sign-extended 32-bit immediate field. - return (V > -(1LL << 32) && V < (1LL << 32)-1); -} - -/// isLegalAddressImmediate - Return true if the GlobalValue can be used as -/// the offset of the target addressing mode. -bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { - // In 64-bit mode, GV is 64-bit so it won't fit in the 32-bit displacement - // field unless we are in small code model. - if (Subtarget->is64Bit() && - getTargetMachine().getCodeModel() != CodeModel::Small) +// isLegalAddressingMode - Return true if the addressing mode represented +// by AM is legal for this target, for a load/store of the specified type. +bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // X86 supports extremely general addressing modes. + + // X86 allows a sign-extended 32-bit immediate field as a displacement. + if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) return false; - return (!Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)); -} + if (AM.BaseGV) { + // We can only fold this if we don't need an extra load. + if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) + return false; -/// isLegalAddressScale - Return true if the integer value can be used as the -/// scale of the target addressing mode for load / store of the given type. -bool X86TargetLowering::isLegalAddressScale(int64_t S, const Type *Ty) const { - switch (S) { - default: + // X86-64 only supports addr of globals in small code model. + if (Subtarget->is64Bit()) { + if (getTargetMachine().getCodeModel() != CodeModel::Small) + return false; + // If lower 4G is not available, then we must use rip-relative addressing. + if (AM.BaseOffs || AM.Scale > 1) + return false; + } + } + + switch (AM.Scale) { + case 0: + case 1: + case 2: + case 4: + case 8: + // These scales always work. + break; + case 3: + case 5: + case 9: + // These scales are formed with basereg+scalereg. Only accept if there is + // no basereg yet. + if (AM.HasBaseReg) + return false; + break; + default: // Other stuff never works. return false; - case 2: case 4: case 8: - return true; - // FIXME: These require both scale + index last and thus more expensive. - // How to tell LSR to try for 2, 4, 8 first? - case 3: case 5: case 9: - return true; } + + return true; } -/// isLegalAddressScaleAndImm - Return true if S works for IsLegalAddressScale -/// and V works for isLegalAddressImmediate _and_ both can be applied -/// simultaneously to the same instruction. -bool X86TargetLowering::isLegalAddressScaleAndImm(int64_t S, int64_t V, - const Type* Ty) const { - return isLegalAddressScale(S, Ty) && isLegalAddressImmediate(V, Ty); -} - -/// isLegalAddressScaleAndImm - Return true if S works for IsLegalAddressScale -/// and GV works for isLegalAddressImmediate _and_ both can be applied -/// simultaneously to the same instruction. -bool X86TargetLowering::isLegalAddressScaleAndImm(int64_t S, GlobalValue *GV, - const Type* Ty) const { - return isLegalAddressScale(S, Ty) && isLegalAddressImmediate(GV); -} /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. @@ -4102,11 +4502,14 @@ X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { // Only do shuffles on 128-bit vector types for now. if (MVT::getSizeInBits(VT) == 64) return false; return (Mask.Val->getNumOperands() <= 4 || + isIdentityMask(Mask.Val) || + isIdentityMask(Mask.Val, true) || isSplatMask(Mask.Val) || isPSHUFHW_PSHUFLWMask(Mask.Val) || X86::isUNPCKLMask(Mask.Val) || + X86::isUNPCKHMask(Mask.Val) || X86::isUNPCKL_v_undef_Mask(Mask.Val) || - X86::isUNPCKHMask(Mask.Val)); + X86::isUNPCKH_v_undef_Mask(Mask.Val)); } bool X86TargetLowering::isVectorClearMaskLegal(std::vector &BVOps, @@ -4195,9 +4598,15 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, return BB; } - case X86::FP_TO_INT16_IN_MEM: - case X86::FP_TO_INT32_IN_MEM: - case X86::FP_TO_INT64_IN_MEM: { + case X86::FP32_TO_INT16_IN_MEM: + case X86::FP32_TO_INT32_IN_MEM: + case X86::FP32_TO_INT64_IN_MEM: + case X86::FP64_TO_INT16_IN_MEM: + case X86::FP64_TO_INT32_IN_MEM: + case X86::FP64_TO_INT64_IN_MEM: + case X86::FP80_TO_INT16_IN_MEM: + case X86::FP80_TO_INT32_IN_MEM: + case X86::FP80_TO_INT64_IN_MEM: { // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); @@ -4224,9 +4633,15 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, unsigned Opc; switch (MI->getOpcode()) { default: assert(0 && "illegal opcode!"); - case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; - case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; - case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; + case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; + case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; + case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; + case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; + case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; + case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; + case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; + case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; + case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; } X86AddressMode AM; @@ -4270,6 +4685,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const { unsigned Opc = Op.getOpcode(); assert((Opc >= ISD::BUILTIN_OP_END || @@ -4298,11 +4714,11 @@ static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { i %= NumElems; if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { return (i == 0) - ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); + ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { SDOperand Idx = PermMask.getOperand(i); if (Idx.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); + return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); return getShuffleScalarElt(V.Val,cast(Idx)->getValue(),DAG); } return SDOperand(); @@ -4349,8 +4765,8 @@ static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, if (Loc.getOpcode() == ISD::FrameIndex) { if (BaseLoc.getOpcode() != ISD::FrameIndex) return false; - int FI = dyn_cast(Loc)->getIndex(); - int BFI = dyn_cast(BaseLoc)->getIndex(); + int FI = cast(Loc)->getIndex(); + int BFI = cast(BaseLoc)->getIndex(); int FS = MFI->getObjectSize(FI); int BFS = MFI->getObjectSize(BFI); if (FS != BFS || FS != Size) return false; @@ -4377,7 +4793,7 @@ static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, return (GV->getAlignment() >= 16 && (Offset % 16) == 0); else { assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); - int BFI = dyn_cast(Base)->getIndex(); + int BFI = cast(Base)->getIndex(); if (BFI < 0) // Fixed objects do not specify alignment, however the offsets are known. return ((Subtarget->getStackAlignment() % 16) == 0 && @@ -4398,7 +4814,7 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MVT::ValueType VT = N->getValueType(0); - MVT::ValueType EVT = MVT::getVectorBaseType(VT); + MVT::ValueType EVT = MVT::getVectorElementType(VT); SDOperand PermMask = N->getOperand(2); int NumElems = (int)PermMask.getNumOperands(); SDNode *Base = NULL; @@ -4420,19 +4836,14 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, } bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); + LoadSDNode *LD = cast(Base); if (isAlign16) { - LoadSDNode *LD = cast(Base); return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), - LD->getSrcValueOffset()); + LD->getSrcValueOffset(), LD->isVolatile()); } else { - // Just use movups, it's shorter. - SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other); - SmallVector Ops; - Ops.push_back(Base->getOperand(0)); - Ops.push_back(Base->getOperand(1)); - Ops.push_back(Base->getOperand(2)); - return DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size())); + return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), + LD->getSrcValueOffset(), LD->isVolatile(), + LD->getAlignment()); } } @@ -4559,76 +4970,73 @@ isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) { case 'I': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getValue() <= 31) - return Op; + return DAG.getTargetConstant(C->getValue(), Op.getValueType()); } return SDOperand(0,0); case 'N': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getValue() <= 255) - return Op; + return DAG.getTargetConstant(C->getValue(), Op.getValueType()); } return SDOperand(0,0); - case 'i': + case 'i': { // Literal immediates are always ok. - if (isa(Op)) return Op; + if (ConstantSDNode *CST = dyn_cast(Op)) + return DAG.getTargetConstant(CST->getValue(), Op.getValueType()); - // If we are in non-pic codegen mode, we allow the address of a global to - // be used with 'i'. - if (GlobalAddressSDNode *GA = dyn_cast(Op)) { - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + // If we are in non-pic codegen mode, we allow the address of a global (with + // an optional displacement) to be used with 'i'. + GlobalAddressSDNode *GA = dyn_cast(Op); + int64_t Offset = 0; + + // Match either (GA) or (GA+C) + if (GA) { + Offset = GA->getOffset(); + } else if (Op.getOpcode() == ISD::ADD) { + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + GA = dyn_cast(Op.getOperand(0)); + if (C && GA) { + Offset = GA->getOffset()+C->getValue(); + } else { + C = dyn_cast(Op.getOperand(1)); + GA = dyn_cast(Op.getOperand(0)); + if (C && GA) + Offset = GA->getOffset()+C->getValue(); + else + C = 0, GA = 0; + } + } + + if (GA) { + // If addressing this global requires a load (e.g. in PIC mode), we can't + // match. + if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), + false)) return SDOperand(0, 0); - if (GA->getOpcode() != ISD::TargetGlobalAddress) - Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), - GA->getOffset()); + Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + Offset); return Op; } // Otherwise, not valid for this mode. return SDOperand(0, 0); } + } return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG); } - std::vector X86TargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, MVT::ValueType VT) const { if (Constraint.size() == 1) { // FIXME: not handling fp-stack yet! - // FIXME: not handling MMX registers yet ('y' constraint). switch (Constraint[0]) { // GCC X86 Constraint Letters default: break; // Unknown constraint letter case 'A': // EAX/EDX if (VT == MVT::i32 || VT == MVT::i64) return make_vector(X86::EAX, X86::EDX, 0); break; - case 'r': // GENERAL_REGS - case 'R': // LEGACY_REGS - if (VT == MVT::i64 && Subtarget->is64Bit()) - return make_vector(X86::RAX, X86::RDX, X86::RCX, X86::RBX, - X86::RSI, X86::RDI, X86::RBP, X86::RSP, - X86::R8, X86::R9, X86::R10, X86::R11, - X86::R12, X86::R13, X86::R14, X86::R15, 0); - if (VT == MVT::i32) - return make_vector(X86::EAX, X86::EDX, X86::ECX, X86::EBX, - X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); - else if (VT == MVT::i16) - return make_vector(X86::AX, X86::DX, X86::CX, X86::BX, - X86::SI, X86::DI, X86::BP, X86::SP, 0); - else if (VT == MVT::i8) - return make_vector(X86::AL, X86::DL, X86::CL, X86::BL, 0); - break; - case 'l': // INDEX_REGS - if (VT == MVT::i32) - return make_vector(X86::EAX, X86::EDX, X86::ECX, X86::EBX, - X86::ESI, X86::EDI, X86::EBP, 0); - else if (VT == MVT::i16) - return make_vector(X86::AX, X86::DX, X86::CX, X86::BX, - X86::SI, X86::DI, X86::BP, 0); - else if (VT == MVT::i8) - return make_vector(X86::AL, X86::DL, X86::CL, X86::DL, 0); - break; case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) case 'Q': // Q_REGS if (VT == MVT::i32) @@ -4638,18 +5046,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, else if (VT == MVT::i8) return make_vector(X86::AL, X86::DL, X86::CL, X86::DL, 0); break; - case 'x': // SSE_REGS if SSE1 allowed - if (Subtarget->hasSSE1()) - return make_vector(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, - 0); - return std::vector(); - case 'Y': // SSE_REGS if SSE2 allowed - if (Subtarget->hasSSE2()) - return make_vector(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, - 0); - return std::vector(); } } @@ -4659,6 +5055,56 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, std::pair X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, MVT::ValueType VT) const { + // First, see if this is a constraint that directly corresponds to an LLVM + // register class. + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: break; + case 'r': // GENERAL_REGS + case 'R': // LEGACY_REGS + case 'l': // INDEX_REGS + if (VT == MVT::i64 && Subtarget->is64Bit()) + return std::make_pair(0U, X86::GR64RegisterClass); + if (VT == MVT::i32) + return std::make_pair(0U, X86::GR32RegisterClass); + else if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16RegisterClass); + else if (VT == MVT::i8) + return std::make_pair(0U, X86::GR8RegisterClass); + break; + case 'y': // MMX_REGS if MMX allowed. + if (!Subtarget->hasMMX()) break; + return std::make_pair(0U, X86::VR64RegisterClass); + break; + case 'Y': // SSE_REGS if SSE2 allowed + if (!Subtarget->hasSSE2()) break; + // FALL THROUGH. + case 'x': // SSE_REGS if SSE1 allowed + if (!Subtarget->hasSSE1()) break; + + switch (VT) { + default: break; + // Scalar SSE types. + case MVT::f32: + case MVT::i32: + return std::make_pair(0U, X86::FR32RegisterClass); + case MVT::f64: + case MVT::i64: + return std::make_pair(0U, X86::FR64RegisterClass); + // Vector types. + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + return std::make_pair(0U, X86::VR128RegisterClass); + } + break; + } + } + // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res;