X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=8214e2ef22697733b88203b2d8bdce2d51c8c84f;hb=eda65fa20ba99e520d7f171d5b536458eea6e9c7;hp=b59dad3a673bd0dbe3f39879ccb59b6a4e83e1ec;hpb=3c992d291bc67d9ce9d742d586d24ade9a577c99;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b59dad3a673..8214e2ef226 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19,6 +19,9 @@ #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -26,7 +29,6 @@ #include "llvm/CodeGen/SSARegMap.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/VectorExtras.h" using namespace llvm; // FIXME: temporary. @@ -48,6 +50,20 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setSchedulingPreference(SchedulingForRegPressure); setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 setStackPointerRegisterToSaveRestore(X86::ESP); + + if (!Subtarget->isTargetDarwin()) + // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. + setUseUnderscoreSetJmpLongJmp(true); + + // Add legal addressing mode scale values. + addLegalAddressScale(8); + addLegalAddressScale(4); + addLegalAddressScale(2); + // Enter the ones which require both scale + index last. These are more + // expensive. + addLegalAddressScale(9); + addLegalAddressScale(5); + addLegalAddressScale(3); // Set up the register classes. addRegisterClass(MVT::i8, X86::R8RegisterClass); @@ -114,8 +130,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); - setOperationAction(ISD::BRCONDTWOWAY , MVT::Other, Expand); - setOperationAction(ISD::BRTWOWAY_CC , MVT::Other, Expand); setOperationAction(ISD::BR_CC , MVT::Other, Expand); setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); @@ -155,6 +169,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::RET , MVT::Other, Custom); // Darwin ABI issue. setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); + setOperationAction(ISD::JumpTable , MVT::i32 , Custom); setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) @@ -169,7 +184,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::LOCATION, MVT::Other, Expand); setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); // FIXME - use subtarget debug flags - if (!TM.getSubtarget().isTargetDarwin()) + if (!Subtarget->isTargetDarwin()) setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); // VASTART needs to be custom lowered to use the VarArgsFrameIndex @@ -242,48 +257,100 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); } - if (TM.getSubtarget().hasMMX()) { + if (Subtarget->hasMMX()) { addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); // FIXME: add MMX packed arithmetics - setOperationAction(ISD::ConstantVec, MVT::v8i8, Expand); - setOperationAction(ISD::ConstantVec, MVT::v4i16, Expand); - setOperationAction(ISD::ConstantVec, MVT::v2i32, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); } - if (TM.getSubtarget().hasSSE1()) { + if (Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); - setOperationAction(ISD::ADD , MVT::v4f32, Legal); - setOperationAction(ISD::SUB , MVT::v4f32, Legal); - setOperationAction(ISD::MUL , MVT::v4f32, Legal); - setOperationAction(ISD::LOAD , MVT::v4f32, Legal); - setOperationAction(ISD::ConstantVec, MVT::v4f32, Expand); + setOperationAction(ISD::AND, MVT::v4f32, Legal); + setOperationAction(ISD::OR, MVT::v4f32, Legal); + setOperationAction(ISD::XOR, MVT::v4f32, Legal); + setOperationAction(ISD::ADD, MVT::v4f32, Legal); + setOperationAction(ISD::SUB, MVT::v4f32, Legal); + setOperationAction(ISD::MUL, MVT::v4f32, Legal); + setOperationAction(ISD::LOAD, MVT::v4f32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::SELECT, MVT::v4f32, Custom); } - if (TM.getSubtarget().hasSSE2()) { + if (Subtarget->hasSSE2()) { addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); + setOperationAction(ISD::ADD, MVT::v2f64, Legal); + setOperationAction(ISD::ADD, MVT::v16i8, Legal); + setOperationAction(ISD::ADD, MVT::v8i16, Legal); + setOperationAction(ISD::ADD, MVT::v4i32, Legal); + setOperationAction(ISD::SUB, MVT::v2f64, Legal); + setOperationAction(ISD::SUB, MVT::v16i8, Legal); + setOperationAction(ISD::SUB, MVT::v8i16, Legal); + setOperationAction(ISD::SUB, MVT::v4i32, Legal); + setOperationAction(ISD::MUL, MVT::v8i16, Legal); + setOperationAction(ISD::MUL, MVT::v2f64, Legal); + + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); + // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + + // Custom lower build_vector, vector_shuffle, and extract_vector_elt. + for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { + setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); + } + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); + + // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. + for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { + setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); + setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); + setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); + setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); + setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); + } - setOperationAction(ISD::ADD , MVT::v2f64, Legal); - setOperationAction(ISD::SUB , MVT::v2f64, Legal); - setOperationAction(ISD::MUL , MVT::v2f64, Legal); - setOperationAction(ISD::LOAD , MVT::v2f64, Legal); - setOperationAction(ISD::ConstantVec, MVT::v2f64, Expand); - setOperationAction(ISD::ConstantVec, MVT::v16i8, Expand); - setOperationAction(ISD::ConstantVec, MVT::v8i16, Expand); - setOperationAction(ISD::ConstantVec, MVT::v4i32, Expand); - setOperationAction(ISD::ConstantVec, MVT::v2i64, Expand); + // Custom lower v2i64 and v2f64 selects. + setOperationAction(ISD::LOAD, MVT::v2f64, Legal); + setOperationAction(ISD::LOAD, MVT::v2i64, Legal); + setOperationAction(ISD::SELECT, MVT::v2f64, Custom); + setOperationAction(ISD::SELECT, MVT::v2i64, Custom); } + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + computeRegisterProperties(); // FIXME: These should be based on subtarget info. Plus, the values should @@ -296,9 +363,18 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) std::vector X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { + std::vector Args = TargetLowering::LowerArguments(F, DAG); + + FormalArgs.clear(); + FormalArgLocs.clear(); + + // This sets BytesToPopOnReturn, BytesCallerReserves, etc. which have to be set + // before the rest of the function can be lowered. if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) - return LowerFastCCArguments(F, DAG); - return LowerCCCArguments(F, DAG); + PreprocessFastCCArguments(Args, F, DAG); + else + PreprocessCCCArguments(Args, F, DAG); + return Args; } std::pair @@ -326,10 +402,43 @@ X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, // C Calling Convention implementation //===----------------------------------------------------------------------===// -std::vector -X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { - std::vector ArgValues; +static unsigned getFormalArgSize(MVT::ValueType ObjectVT) { + unsigned ObjSize = 0; + switch (ObjectVT) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i1: + case MVT::i8: ObjSize = 1; break; + case MVT::i16: ObjSize = 2; break; + case MVT::i32: ObjSize = 4; break; + case MVT::i64: ObjSize = 8; break; + case MVT::f32: ObjSize = 4; break; + case MVT::f64: ObjSize = 8; break; + } + return ObjSize; +} + +static std::vector getFormalArgObjects(SDOperand Op) { + unsigned Opc = Op.getOpcode(); + std::vector Objs; + if (Opc == ISD::TRUNCATE) { + Op = Op.getOperand(0); + assert(Op.getOpcode() == ISD::AssertSext || + Op.getOpcode() == ISD::AssertZext); + Objs.push_back(Op.getOperand(0)); + } else if (Opc == ISD::FP_ROUND) { + Objs.push_back(Op.getOperand(0)); + } else if (Opc == ISD::BUILD_PAIR) { + Objs.push_back(Op.getOperand(0)); + Objs.push_back(Op.getOperand(1)); + } else { + Objs.push_back(Op); + } + return Objs; +} +void X86TargetLowering::PreprocessCCCArguments(std::vectorArgs, + Function &F, SelectionDAG &DAG) { + unsigned NumArgs = Args.size(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -342,41 +451,25 @@ X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { // ... // unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { - MVT::ValueType ObjectVT = getValueType(I->getType()); - unsigned ArgIncrement = 4; - unsigned ObjSize; - switch (ObjectVT) { - default: assert(0 && "Unhandled argument type!"); - case MVT::i1: - case MVT::i8: ObjSize = 1; break; - case MVT::i16: ObjSize = 2; break; - case MVT::i32: ObjSize = 4; break; - case MVT::i64: ObjSize = ArgIncrement = 8; break; - case MVT::f32: ObjSize = 4; break; - case MVT::f64: ObjSize = ArgIncrement = 8; break; - } - // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); - - // Create the SelectionDAG nodes corresponding to a load from this parameter - SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); + for (unsigned i = 0; i < NumArgs; ++i) { + SDOperand Op = Args[i]; + std::vector Objs = getFormalArgObjects(Op); + for (std::vector::iterator I = Objs.begin(), E = Objs.end(); + I != E; ++I) { + SDOperand Obj = *I; + MVT::ValueType ObjectVT = Obj.getValueType(); + unsigned ArgIncrement = 4; + unsigned ObjSize = getFormalArgSize(ObjectVT); + if (ObjSize == 8) + ArgIncrement = 8; - // Don't codegen dead arguments. FIXME: remove this check when we can nuke - // dead loads. - SDOperand ArgValue; - if (!I->use_empty()) - ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, - DAG.getSrcValue(NULL)); - else { - if (MVT::isInteger(ObjectVT)) - ArgValue = DAG.getConstant(0, ObjectVT); - else - ArgValue = DAG.getConstantFP(0, ObjectVT); + // Create the frame index object for this incoming parameter... + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + std::pair Loc = + std::make_pair(FALocInfo(FALocInfo::StackFrameLoc, FI), FALocInfo()); + FormalArgLocs.push_back(Loc); + ArgOffset += ArgIncrement; // Move on to the next argument... } - ArgValues.push_back(ArgValue); - - ArgOffset += ArgIncrement; // Move on to the next argument... } // If the function takes variable number of arguments, make a frame index for @@ -386,27 +479,21 @@ X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { ReturnAddrIndex = 0; // No return address slot generated yet. BytesToPopOnReturn = 0; // Callee pops nothing. BytesCallerReserves = ArgOffset; +} - // Finally, inform the code generator which regs we return values in. - switch (getValueType(F.getReturnType())) { - default: assert(0 && "Unknown type!"); - case MVT::isVoid: break; - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - MF.addLiveOut(X86::EAX); - break; - case MVT::i64: - MF.addLiveOut(X86::EAX); - MF.addLiveOut(X86::EDX); - break; - case MVT::f32: - case MVT::f64: - MF.addLiveOut(X86::ST0); - break; +void X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { + unsigned NumArgs = Op.Val->getNumValues(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + for (unsigned i = 0; i < NumArgs; ++i) { + // Create the SelectionDAG nodes corresponding to a load from this parameter + unsigned FI = FormalArgLocs[i].first.Loc; + SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); + SDOperand ArgValue = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), + FIN, DAG.getSrcValue(NULL)); + FormalArgs.push_back(ArgValue); } - return ArgValues; } std::pair @@ -630,11 +717,74 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, return VReg; } +// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments +// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and +// EDX". Anything more is illegal. +// +// FIXME: The linscan register allocator currently has problem with +// coalescing. At the time of this writing, whenever it decides to coalesce +// a physreg with a virtreg, this increases the size of the physreg's live +// range, and the live range cannot ever be reduced. This causes problems if +// too many physregs are coaleced with virtregs, which can cause the register +// allocator to wedge itself. +// +// This code triggers this problem more often if we pass args in registers, +// so disable it until this is fixed. +// +// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings +// about code being dead. +// +static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; + -std::vector -X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { - std::vector ArgValues; +static void +DetermineFastCCFormalArgSizeNumRegs(MVT::ValueType ObjectVT, + unsigned &ObjSize, unsigned &NumIntRegs) { + ObjSize = 0; + NumIntRegs = 0; + + switch (ObjectVT) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i1: + case MVT::i8: + if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) + NumIntRegs = 1; + else + ObjSize = 1; + break; + case MVT::i16: + if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) + NumIntRegs = 1; + else + ObjSize = 2; + break; + case MVT::i32: + if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) + NumIntRegs = 1; + else + ObjSize = 4; + break; + case MVT::i64: + if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { + NumIntRegs = 2; + } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { + NumIntRegs = 1; + ObjSize = 4; + } else + ObjSize = 8; + case MVT::f32: + ObjSize = 4; + break; + case MVT::f64: + ObjSize = 8; + break; + } +} +void +X86TargetLowering::PreprocessFastCCArguments(std::vectorArgs, + Function &F, SelectionDAG &DAG) { + unsigned NumArgs = Args.size(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -651,119 +801,79 @@ X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both // used). unsigned NumIntRegs = 0; - - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { - MVT::ValueType ObjectVT = getValueType(I->getType()); - unsigned ArgIncrement = 4; - unsigned ObjSize = 0; - SDOperand ArgValue; - - switch (ObjectVT) { - default: assert(0 && "Unhandled argument type!"); - case MVT::i1: - case MVT::i8: - if (NumIntRegs < 2) { - if (!I->use_empty()) { - unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, - X86::R8RegisterClass); - ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); - DAG.setRoot(ArgValue.getValue(1)); - if (ObjectVT == MVT::i1) - // FIXME: Should insert a assertzext here. - ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); - } - ++NumIntRegs; - break; - } - - ObjSize = 1; - break; - case MVT::i16: - if (NumIntRegs < 2) { - if (!I->use_empty()) { - unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, - X86::R16RegisterClass); - ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); - DAG.setRoot(ArgValue.getValue(1)); - } - ++NumIntRegs; - break; - } - ObjSize = 2; - break; - case MVT::i32: - if (NumIntRegs < 2) { - if (!I->use_empty()) { - unsigned VReg = AddLiveIn(MF,NumIntRegs ? X86::EDX : X86::EAX, - X86::R32RegisterClass); - ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); - DAG.setRoot(ArgValue.getValue(1)); - } - ++NumIntRegs; - break; + + for (unsigned i = 0; i < NumArgs; ++i) { + SDOperand Op = Args[i]; + std::vector Objs = getFormalArgObjects(Op); + for (std::vector::iterator I = Objs.begin(), E = Objs.end(); + I != E; ++I) { + SDOperand Obj = *I; + MVT::ValueType ObjectVT = Obj.getValueType(); + unsigned ArgIncrement = 4; + unsigned ObjSize = 0; + unsigned NumRegs = 0; + + DetermineFastCCFormalArgSizeNumRegs(ObjectVT, ObjSize, NumRegs); + if (ObjSize == 8) + ArgIncrement = 8; + + unsigned Reg; + std::pair Loc = std::make_pair(FALocInfo(), + FALocInfo()); + if (NumRegs) { + switch (ObjectVT) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i1: + case MVT::i8: + Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, + X86::R8RegisterClass); + Loc.first.Kind = FALocInfo::LiveInRegLoc; + Loc.first.Loc = Reg; + Loc.first.Typ = MVT::i8; + break; + case MVT::i16: + Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, + X86::R16RegisterClass); + Loc.first.Kind = FALocInfo::LiveInRegLoc; + Loc.first.Loc = Reg; + Loc.first.Typ = MVT::i16; + break; + case MVT::i32: + Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, + X86::R32RegisterClass); + Loc.first.Kind = FALocInfo::LiveInRegLoc; + Loc.first.Loc = Reg; + Loc.first.Typ = MVT::i32; + break; + case MVT::i64: + Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, + X86::R32RegisterClass); + Loc.first.Kind = FALocInfo::LiveInRegLoc; + Loc.first.Loc = Reg; + Loc.first.Typ = MVT::i32; + if (NumRegs == 2) { + Reg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); + Loc.second.Kind = FALocInfo::LiveInRegLoc; + Loc.second.Loc = Reg; + Loc.second.Typ = MVT::i32; + } + break; + } } - ObjSize = 4; - break; - case MVT::i64: - if (NumIntRegs == 0) { - if (!I->use_empty()) { - unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); - unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); - - SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); - SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); - DAG.setRoot(Hi.getValue(1)); - - ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); - } - NumIntRegs = 2; - break; - } else if (NumIntRegs == 1) { - if (!I->use_empty()) { - unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); - SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); - DAG.setRoot(Low.getValue(1)); - - // Load the high part from memory. - // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(4, ArgOffset); - SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); - SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, - DAG.getSrcValue(NULL)); - ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); + if (ObjSize) { + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + if (ObjectVT == MVT::i64 && NumRegs) { + Loc.second.Kind = FALocInfo::StackFrameLoc; + Loc.second.Loc = FI; + } else { + Loc.first.Kind = FALocInfo::StackFrameLoc; + Loc.first.Loc = FI; } - ArgOffset += 4; - NumIntRegs = 2; - break; + ArgOffset += ArgIncrement; // Move on to the next argument. } - ObjSize = ArgIncrement = 8; - break; - case MVT::f32: ObjSize = 4; break; - case MVT::f64: ObjSize = ArgIncrement = 8; break; - } - // Don't codegen dead arguments. FIXME: remove this check when we can nuke - // dead loads. - if (ObjSize && !I->use_empty()) { - // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); - - // Create the SelectionDAG nodes corresponding to a load from this - // parameter. - SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); - - ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, - DAG.getSrcValue(NULL)); - } else if (ArgValue.Val == 0) { - if (MVT::isInteger(ObjectVT)) - ArgValue = DAG.getConstant(0, ObjectVT); - else - ArgValue = DAG.getConstantFP(0, ObjectVT); + FormalArgLocs.push_back(Loc); } - ArgValues.push_back(ArgValue); - - if (ObjSize) - ArgOffset += ArgIncrement; // Move on to the next argument. } // Make sure the instruction takes 8n+4 bytes to make sure the start of the @@ -795,7 +905,43 @@ X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { MF.addLiveOut(X86::ST0); break; } - return ArgValues; +} +void +X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { + unsigned NumArgs = Op.Val->getNumValues(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + for (unsigned i = 0; i < NumArgs; ++i) { + MVT::ValueType VT = Op.Val->getValueType(i); + std::pair Loc = FormalArgLocs[i]; + SDOperand ArgValue; + if (Loc.first.Kind == FALocInfo::StackFrameLoc) { + // Create the SelectionDAG nodes corresponding to a load from this parameter + SDOperand FIN = DAG.getFrameIndex(Loc.first.Loc, MVT::i32); + ArgValue = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN, + DAG.getSrcValue(NULL)); + } else { + // Must be a CopyFromReg + ArgValue= DAG.getCopyFromReg(DAG.getRoot(), Loc.first.Loc, Loc.first.Typ); + } + + if (Loc.second.Kind != FALocInfo::None) { + SDOperand ArgValue2; + if (Loc.second.Kind == FALocInfo::StackFrameLoc) { + // Create the SelectionDAG nodes corresponding to a load from this parameter + SDOperand FIN = DAG.getFrameIndex(Loc.second.Loc, MVT::i32); + ArgValue2 = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN, + DAG.getSrcValue(NULL)); + } else { + // Must be a CopyFromReg + ArgValue2 = DAG.getCopyFromReg(DAG.getRoot(), + Loc.second.Loc, Loc.second.Typ); + } + ArgValue = DAG.getNode(ISD::BUILD_PAIR, VT, ArgValue, ArgValue2); + } + FormalArgs.push_back(ArgValue); + } } std::pair @@ -817,7 +963,7 @@ X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, case MVT::i8: case MVT::i16: case MVT::i32: - if (NumIntRegs < 2) { + if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { ++NumIntRegs; break; } @@ -826,11 +972,11 @@ X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, NumBytes += 4; break; case MVT::i64: - if (NumIntRegs == 0) { - NumIntRegs = 2; + if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { + NumIntRegs += 2; break; - } else if (NumIntRegs == 1) { - NumIntRegs = 2; + } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { + NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; NumBytes += 4; break; } @@ -863,7 +1009,7 @@ X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, case MVT::i8: case MVT::i16: case MVT::i32: - if (NumIntRegs < 2) { + if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { RegValuesToPass.push_back(Args[i].first); ++NumIntRegs; break; @@ -879,14 +1025,17 @@ X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, break; } case MVT::i64: - if (NumIntRegs < 2) { // Can pass part of it in regs? + // Can pass (at least) part of it in regs? + if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Args[i].first, DAG.getConstant(1, MVT::i32)); SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Args[i].first, DAG.getConstant(0, MVT::i32)); RegValuesToPass.push_back(Lo); ++NumIntRegs; - if (NumIntRegs < 2) { // Pass both parts in regs? + + // Pass both parts in regs? + if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { RegValuesToPass.push_back(Hi); ++NumIntRegs; } else { @@ -1124,9 +1273,8 @@ static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { /// specific condition code. It returns a false if it cannot do a direct /// translation. X86CC is the translated CondCode. Flip is set to true if the /// the order of comparison operands should be flipped. -static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, - bool &Flip) { - ISD::CondCode SetCCOpcode = cast(CC)->get(); +static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, + unsigned &X86CC, bool &Flip) { Flip = false; X86CC = X86ISD::COND_INVALID; if (!isFP) { @@ -1154,16 +1302,16 @@ static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, default: break; case ISD::SETUEQ: case ISD::SETEQ: X86CC = X86ISD::COND_E; break; - case ISD::SETOLE: Flip = true; // Fallthrough + case ISD::SETOLT: Flip = true; // Fallthrough case ISD::SETOGT: case ISD::SETGT: X86CC = X86ISD::COND_A; break; - case ISD::SETOLT: Flip = true; // Fallthrough + case ISD::SETOLE: Flip = true; // Fallthrough case ISD::SETOGE: case ISD::SETGE: X86CC = X86ISD::COND_AE; break; - case ISD::SETUGE: Flip = true; // Fallthrough + case ISD::SETUGT: Flip = true; // Fallthrough case ISD::SETULT: case ISD::SETLT: X86CC = X86ISD::COND_B; break; - case ISD::SETUGT: Flip = true; // Fallthrough + case ISD::SETUGE: Flip = true; // Fallthrough case ISD::SETULE: case ISD::SETLE: X86CC = X86ISD::COND_BE; break; case ISD::SETONE: @@ -1176,6 +1324,11 @@ static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, return X86CC != X86ISD::COND_INVALID; } +static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, + bool &Flip) { + return translateX86CC(cast(CC)->get(), isFP, X86CC, Flip); +} + /// hasFPCMov - is there a floating point cmov for the specific X86 condition /// code. Current x86 isa includes the following FP cmov instructions: /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. @@ -1201,7 +1354,10 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); case X86::CMOV_FR32: - case X86::CMOV_FR64: { + case X86::CMOV_FR64: + case X86::CMOV_V4F32: + case X86::CMOV_V2F64: + case X86::CMOV_V2I64: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the @@ -1224,7 +1380,15 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, MachineFunction *F = BB->getParent(); F->getBasicBlockList().insert(It, copy0MBB); F->getBasicBlockList().insert(It, sinkMBB); - // Update machine-CFG edges + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), + e = BB->succ_end(); i != e; ++i) + sinkMBB->addSuccessor(*i); + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while(!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -1317,262 +1481,1595 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, // X86 Custom Lowering Hooks //===----------------------------------------------------------------------===// -/// LowerOperation - Provide custom lowering hooks for some operations. -/// -SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { - switch (Op.getOpcode()) { - default: assert(0 && "Should not custom lower this!"); - case ISD::SHL_PARTS: - case ISD::SRA_PARTS: - case ISD::SRL_PARTS: { - assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && - "Not an i64 shift!"); - bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; - SDOperand ShOpLo = Op.getOperand(0); - SDOperand ShOpHi = Op.getOperand(1); - SDOperand ShAmt = Op.getOperand(2); - SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, - DAG.getConstant(31, MVT::i8)) - : DAG.getConstant(0, MVT::i32); - - SDOperand Tmp2, Tmp3; - if (Op.getOpcode() == ISD::SHL_PARTS) { - Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); - Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); - } else { - Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); - Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); - } +/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra +/// load. For Darwin, external and weak symbols are indirect, loading the value +/// at address GV rather then the value of GV itself. This means that the +/// GlobalAddress must be in the base or index register of the address, not the +/// GV offset field. +static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { + return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || + (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); +} - SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, - ShAmt, DAG.getConstant(32, MVT::i8)); +/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return +/// true if Op is undef or if its value falls within the specified range (L, H]. +static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { + if (Op.getOpcode() == ISD::UNDEF) + return true; - SDOperand Hi, Lo; - SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); + unsigned Val = cast(Op)->getValue(); + return (Val >= Low && Val < Hi); +} - std::vector Tys; - Tys.push_back(MVT::i32); - Tys.push_back(MVT::Flag); - std::vector Ops; - if (Op.getOpcode() == ISD::SHL_PARTS) { - Ops.push_back(Tmp2); - Ops.push_back(Tmp3); - Ops.push_back(CC); - Ops.push_back(InFlag); - Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); - InFlag = Hi.getValue(1); +/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return +/// true if Op is undef or if its value equal to the specified value. +static bool isUndefOrEqual(SDOperand Op, unsigned Val) { + if (Op.getOpcode() == ISD::UNDEF) + return true; + return cast(Op)->getValue() == Val; +} - Ops.clear(); - Ops.push_back(Tmp3); - Ops.push_back(Tmp1); - Ops.push_back(CC); - Ops.push_back(InFlag); - Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); - } else { - Ops.push_back(Tmp2); - Ops.push_back(Tmp3); - Ops.push_back(CC); - Ops.push_back(InFlag); - Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); - InFlag = Lo.getValue(1); +/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFD. +bool X86::isPSHUFDMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); - Ops.clear(); - Ops.push_back(Tmp3); - Ops.push_back(Tmp1); - Ops.push_back(CC); - Ops.push_back(InFlag); - Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); - } + if (N->getNumOperands() != 4) + return false; - Tys.clear(); - Tys.push_back(MVT::i32); - Tys.push_back(MVT::i32); - Ops.clear(); - Ops.push_back(Lo); - Ops.push_back(Hi); - return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); + // Check if the value doesn't reference the second vector. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast(Arg)->getValue() >= 4) + return false; } - case ISD::SINT_TO_FP: { - assert(Op.getOperand(0).getValueType() <= MVT::i64 && - Op.getOperand(0).getValueType() >= MVT::i16 && - "Unknown SINT_TO_FP to lower!"); - SDOperand Result; - MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); - unsigned Size = MVT::getSizeInBits(SrcVT)/8; - MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); - SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, - DAG.getEntryNode(), Op.getOperand(0), - StackSlot, DAG.getSrcValue(NULL)); + return true; +} - // Build the FILD - std::vector Tys; - Tys.push_back(MVT::f64); - Tys.push_back(MVT::Other); - if (X86ScalarSSE) Tys.push_back(MVT::Flag); - std::vector Ops; - Ops.push_back(Chain); - Ops.push_back(StackSlot); - Ops.push_back(DAG.getValueType(SrcVT)); - Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, - Tys, Ops); - - if (X86ScalarSSE) { - Chain = Result.getValue(1); - SDOperand InFlag = Result.getValue(2); - - // FIXME: Currently the FST is flagged to the FILD_FLAG. This - // shouldn't be necessary except that RFP cannot be live across - // multiple blocks. When stackifier is fixed, they can be uncoupled. - MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); - SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - std::vector Tys; - Tys.push_back(MVT::Other); - std::vector Ops; - Ops.push_back(Chain); - Ops.push_back(Result); - Ops.push_back(StackSlot); - Ops.push_back(DAG.getValueType(Op.getValueType())); - Ops.push_back(InFlag); - Chain = DAG.getNode(X86ISD::FST, Tys, Ops); - Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, - DAG.getSrcValue(NULL)); - } +/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFHW. +bool X86::isPSHUFHWMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 8) + return false; - return Result; + // Lower quadword copied in order. + for (unsigned i = 0; i != 4; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast(Arg)->getValue() != i) + return false; } - case ISD::FP_TO_SINT: { - assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && - "Unknown FP_TO_SINT to lower!"); - // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary - // stack slot. - MachineFunction &MF = DAG.getMachineFunction(); - unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; - int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); - SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - unsigned Opc; - switch (Op.getValueType()) { - default: assert(0 && "Invalid FP_TO_SINT to lower!"); - case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; - case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; - case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; - } + // Upper quadword shuffled. + for (unsigned i = 4; i != 8; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getValue(); + if (Val < 4 || Val > 7) + return false; + } - SDOperand Chain = DAG.getEntryNode(); - SDOperand Value = Op.getOperand(0); - if (X86ScalarSSE) { - assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); - Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, - DAG.getSrcValue(0)); - std::vector Tys; - Tys.push_back(MVT::f64); - Tys.push_back(MVT::Other); - std::vector Ops; - Ops.push_back(Chain); - Ops.push_back(StackSlot); - Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); - Value = DAG.getNode(X86ISD::FLD, Tys, Ops); - Chain = Value.getValue(1); - SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); - StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + return true; +} + +/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFLW. +bool X86::isPSHUFLWMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 8) + return false; + + // Upper quadword copied in order. + for (unsigned i = 4; i != 8; ++i) + if (!isUndefOrEqual(N->getOperand(i), i)) + return false; + + // Lower quadword shuffled. + for (unsigned i = 0; i != 4; ++i) + if (!isUndefOrInRange(N->getOperand(i), 0, 4)) + return false; + + return true; +} + +/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to SHUFP*. +static bool isSHUFPMask(std::vector &N) { + unsigned NumElems = N.size(); + if (NumElems != 2 && NumElems != 4) return false; + + unsigned Half = NumElems / 2; + for (unsigned i = 0; i < Half; ++i) + if (!isUndefOrInRange(N[i], 0, NumElems)) + return false; + for (unsigned i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) + return false; + + return true; +} + +bool X86::isSHUFPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + std::vector Ops(N->op_begin(), N->op_end()); + return ::isSHUFPMask(Ops); +} + +/// isCommutedSHUFP - Returns true if the shuffle mask is except +/// the reverse of what x86 shuffles want. x86 shuffles requires the lower +/// half elements to come from vector 1 (which would equal the dest.) and +/// the upper half to come from vector 2. +static bool isCommutedSHUFP(std::vector &Ops) { + unsigned NumElems = Ops.size(); + if (NumElems != 2 && NumElems != 4) return false; + + unsigned Half = NumElems / 2; + for (unsigned i = 0; i < Half; ++i) + if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) + return false; + for (unsigned i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Ops[i], 0, NumElems)) + return false; + return true; +} + +static bool isCommutedSHUFP(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + std::vector Ops(N->op_begin(), N->op_end()); + return isCommutedSHUFP(Ops); +} + +/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVHLPS. +bool X86::isMOVHLPSMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) + return false; + + // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 + return isUndefOrEqual(N->getOperand(0), 6) && + isUndefOrEqual(N->getOperand(1), 7) && + isUndefOrEqual(N->getOperand(2), 2) && + isUndefOrEqual(N->getOperand(3), 3); +} + +/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. +bool X86::isMOVLPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); + if (NumElems != 2 && NumElems != 4) + return false; + + for (unsigned i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) + return false; + + for (unsigned i = NumElems/2; i < NumElems; ++i) + if (!isUndefOrEqual(N->getOperand(i), i)) + return false; + + return true; +} + +/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} +/// and MOVLHPS. +bool X86::isMOVHPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); + if (NumElems != 2 && NumElems != 4) + return false; + + for (unsigned i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(N->getOperand(i), i)) + return false; + + for (unsigned i = 0; i < NumElems/2; ++i) { + SDOperand Arg = N->getOperand(i + NumElems/2); + if (!isUndefOrEqual(Arg, i + NumElems)) + return false; + } + + return true; +} + +/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to UNPCKL. +bool static isUNPCKLMask(std::vector &N, bool V2IsSplat = false) { + unsigned NumElems = N.size(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { + SDOperand BitI = N[i]; + SDOperand BitI1 = N[i+1]; + if (!isUndefOrEqual(BitI, j)) + return false; + if (V2IsSplat) { + if (isUndefOrEqual(BitI1, NumElems)) + return false; + } else { + if (!isUndefOrEqual(BitI1, j + NumElems)) + return false; + } + } + + return true; +} + +bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + std::vector Ops(N->op_begin(), N->op_end()); + return ::isUNPCKLMask(Ops, V2IsSplat); +} + +/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to UNPCKH. +bool static isUNPCKHMask(std::vector &N, bool V2IsSplat = false) { + unsigned NumElems = N.size(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { + SDOperand BitI = N[i]; + SDOperand BitI1 = N[i+1]; + if (!isUndefOrEqual(BitI, j + NumElems/2)) + return false; + if (V2IsSplat) { + if (isUndefOrEqual(BitI1, NumElems)) + return false; + } else { + if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) + return false; + } + } + + return true; +} + +bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + std::vector Ops(N->op_begin(), N->op_end()); + return ::isUNPCKHMask(Ops, V2IsSplat); +} + +/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form +/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, +/// <0, 0, 1, 1> +bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); + if (NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { + SDOperand BitI = N->getOperand(i); + SDOperand BitI1 = N->getOperand(i+1); + + if (!isUndefOrEqual(BitI, j)) + return false; + if (!isUndefOrEqual(BitI1, j)) + return false; + } + + return true; +} + +/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVSS, +/// MOVSD, and MOVD, i.e. setting the lowest element. +static bool isMOVLMask(std::vector &N) { + unsigned NumElems = N.size(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + if (!isUndefOrEqual(N[0], NumElems)) + return false; + + for (unsigned i = 1; i < NumElems; ++i) { + SDOperand Arg = N[i]; + if (!isUndefOrEqual(Arg, i)) + return false; + } + + return true; +} + +bool X86::isMOVLMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + std::vector Ops(N->op_begin(), N->op_end()); + return ::isMOVLMask(Ops); +} + +/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse +/// of what x86 movss want. X86 movs requires the lowest element to be lowest +/// element of vector 2 and the other elements to come from vector 1 in order. +static bool isCommutedMOVL(std::vector &Ops, bool V2IsSplat = false) { + unsigned NumElems = Ops.size(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + if (!isUndefOrEqual(Ops[0], 0)) + return false; + + for (unsigned i = 1; i < NumElems; ++i) { + SDOperand Arg = Ops[i]; + if (V2IsSplat) { + if (!isUndefOrEqual(Arg, NumElems)) + return false; + } else { + if (!isUndefOrEqual(Arg, i+NumElems)) + return false; + } + } + + return true; +} + +static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + std::vector Ops(N->op_begin(), N->op_end()); + return isCommutedMOVL(Ops, V2IsSplat); +} + +/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. +bool X86::isMOVSHDUPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) + return false; + + // Expect 1, 1, 3, 3 + for (unsigned i = 0; i < 2; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getValue(); + if (Val != 1) return false; + } + + bool HasHi = false; + for (unsigned i = 2; i < 4; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getValue(); + if (Val != 3) return false; + HasHi = true; + } + + // Don't use movshdup if it can be done with a shufps. + return HasHi; +} + +/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. +bool X86::isMOVSLDUPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) + return false; + + // Expect 0, 0, 2, 2 + for (unsigned i = 0; i < 2; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getValue(); + if (Val != 0) return false; + } + + bool HasHi = false; + for (unsigned i = 2; i < 4; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getValue(); + if (Val != 2) return false; + HasHi = true; + } + + // Don't use movshdup if it can be done with a shufps. + return HasHi; +} + +/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies +/// a splat of a single element. +static bool isSplatMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + // This is a splat operation if each element of the permute is the same, and + // if the value doesn't reference the second vector. + unsigned NumElems = N->getNumOperands(); + SDOperand ElementBase; + unsigned i = 0; + for (; i != NumElems; ++i) { + SDOperand Elt = N->getOperand(i); + if (ConstantSDNode *EltV = dyn_cast(Elt)) { + ElementBase = Elt; + break; + } + } + + if (!ElementBase.Val) + return false; + + for (; i != NumElems; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + if (Arg != ElementBase) return false; + } + + // Make sure it is a splat of the first vector operand. + return cast(ElementBase)->getValue() < NumElems; +} + +/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies +/// a splat of a single element and it's a 2 or 4 element mask. +bool X86::isSplatMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + // We can only splat 64-bit, and 32-bit quantities with a single instruction. + if (N->getNumOperands() != 4 && N->getNumOperands() != 2) + return false; + return ::isSplatMask(N); +} + +/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle +/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* +/// instructions. +unsigned X86::getShuffleSHUFImmediate(SDNode *N) { + unsigned NumOperands = N->getNumOperands(); + unsigned Shift = (NumOperands == 4) ? 2 : 1; + unsigned Mask = 0; + for (unsigned i = 0; i < NumOperands; ++i) { + unsigned Val = 0; + SDOperand Arg = N->getOperand(NumOperands-i-1); + if (Arg.getOpcode() != ISD::UNDEF) + Val = cast(Arg)->getValue(); + if (Val >= NumOperands) Val -= NumOperands; + Mask |= Val; + if (i != NumOperands - 1) + Mask <<= Shift; + } + + return Mask; +} + +/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle +/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW +/// instructions. +unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { + unsigned Mask = 0; + // 8 nodes, but we only care about the last 4. + for (unsigned i = 7; i >= 4; --i) { + unsigned Val = 0; + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) + Val = cast(Arg)->getValue(); + Mask |= (Val - 4); + if (i != 4) + Mask <<= 2; + } + + return Mask; +} + +/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle +/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW +/// instructions. +unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { + unsigned Mask = 0; + // 8 nodes, but we only care about the first 4. + for (int i = 3; i >= 0; --i) { + unsigned Val = 0; + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) + Val = cast(Arg)->getValue(); + Mask |= Val; + if (i != 0) + Mask <<= 2; + } + + return Mask; +} + +/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand +/// specifies a 8 element shuffle that can be broken into a pair of +/// PSHUFHW and PSHUFLW. +static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 8) + return false; + + // Lower quadword shuffled. + for (unsigned i = 0; i != 4; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getValue(); + if (Val > 4) + return false; + } + + // Upper quadword shuffled. + for (unsigned i = 4; i != 8; ++i) { + SDOperand Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getValue(); + if (Val < 4 || Val > 7) + return false; + } + + return true; +} + +/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as +/// values in ther permute mask. +static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { + SDOperand V1 = Op.getOperand(0); + SDOperand V2 = Op.getOperand(1); + SDOperand Mask = Op.getOperand(2); + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType MaskVT = Mask.getValueType(); + MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); + unsigned NumElems = Mask.getNumOperands(); + std::vector MaskVec; + + for (unsigned i = 0; i != NumElems; ++i) { + SDOperand Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); + continue; + } + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getValue(); + if (Val < NumElems) + MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); + else + MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + } + + Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); +} + +/// ShouldXformToMOVHLPS - Return true if the node should be transformed to +/// match movhlps. The lower half elements should come from upper half of +/// V1 (and in order), and the upper half elements should come from the upper +/// half of V2 (and in order). +static bool ShouldXformToMOVHLPS(SDNode *Mask) { + unsigned NumElems = Mask->getNumOperands(); + if (NumElems != 4) + return false; + for (unsigned i = 0, e = 2; i != e; ++i) + if (!isUndefOrEqual(Mask->getOperand(i), i+2)) + return false; + for (unsigned i = 2; i != 4; ++i) + if (!isUndefOrEqual(Mask->getOperand(i), i+4)) + return false; + return true; +} + +/// isScalarLoadToVector - Returns true if the node is a scalar load that +/// is promoted to a vector. +static inline bool isScalarLoadToVector(SDNode *N) { + if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { + N = N->getOperand(0).Val; + return (N->getOpcode() == ISD::LOAD); + } + return false; +} + +/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to +/// match movlp{s|d}. The lower half elements should come from lower half of +/// V1 (and in order), and the upper half elements should come from the upper +/// half of V2 (and in order). And since V1 will become the source of the +/// MOVLP, it must be either a vector load or a scalar load to vector. +static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { + if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) + return false; + + unsigned NumElems = Mask->getNumOperands(); + if (NumElems != 2 && NumElems != 4) + return false; + for (unsigned i = 0, e = NumElems/2; i != e; ++i) + if (!isUndefOrEqual(Mask->getOperand(i), i)) + return false; + for (unsigned i = NumElems/2; i != NumElems; ++i) + if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) + return false; + return true; +} + +/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are +/// all the same. +static bool isSplatVector(SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + SDOperand SplatValue = N->getOperand(0); + for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i) != SplatValue) + return false; + return true; +} + +/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements +/// that point to V2 points to its first element. +static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { + assert(Mask.getOpcode() == ISD::BUILD_VECTOR); + + bool Changed = false; + std::vector MaskVec; + unsigned NumElems = Mask.getNumOperands(); + for (unsigned i = 0; i != NumElems; ++i) { + SDOperand Arg = Mask.getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + unsigned Val = cast(Arg)->getValue(); + if (Val > NumElems) { + Arg = DAG.getConstant(NumElems, Arg.getValueType()); + Changed = true; + } } + MaskVec.push_back(Arg); + } + + if (Changed) + Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); + return Mask; +} + +/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd +/// operation of specified width. +static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + + std::vector MaskVec; + MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); + for (unsigned i = 1; i != NumElems; ++i) + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); +} - // Build the FP_TO_INT*_IN_MEM +/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation +/// of specified width. +static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + std::vector MaskVec; + for (unsigned i = 0, e = NumElems/2; i != e; ++i) { + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); +} + +/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation +/// of specified width. +static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + unsigned Half = NumElems/2; + std::vector MaskVec; + for (unsigned i = 0; i != Half; ++i) { + MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); + MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); +} + +/// getZeroVector - Returns a vector of specified type with all zero elements. +/// +static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { + assert(MVT::isVector(VT) && "Expected a vector type"); + unsigned NumElems = getVectorNumElements(VT); + MVT::ValueType EVT = MVT::getVectorBaseType(VT); + bool isFP = MVT::isFloatingPoint(EVT); + SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); + std::vector ZeroVec(NumElems, Zero); + return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec); +} + +/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. +/// +static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { + SDOperand V1 = Op.getOperand(0); + SDOperand Mask = Op.getOperand(2); + MVT::ValueType VT = Op.getValueType(); + unsigned NumElems = Mask.getNumOperands(); + Mask = getUnpacklMask(NumElems, DAG); + while (NumElems != 4) { + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); + NumElems >>= 1; + } + V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); + + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); + Mask = getZeroVector(MaskVT, DAG); + SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, + DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); + return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); +} + +/// isZeroNode - Returns true if Elt is a constant zero or a floating point +/// constant +0.0. +static inline bool isZeroNode(SDOperand Elt) { + return ((isa(Elt) && + cast(Elt)->getValue() == 0) || + (isa(Elt) && + cast(Elt)->isExactlyValue(0.0))); +} + +/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified +/// vector and zero or undef vector. +static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, + unsigned NumElems, unsigned Idx, + bool isZero, SelectionDAG &DAG) { + SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); + SDOperand Zero = DAG.getConstant(0, EVT); + std::vector MaskVec(NumElems, Zero); + MaskVec[Idx] = DAG.getConstant(NumElems, EVT); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); +} + +/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. +/// +static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, + unsigned NumNonZero, unsigned NumZero, + SelectionDAG &DAG) { + if (NumNonZero > 8) + return SDOperand(); + + SDOperand V(0, 0); + bool First = true; + for (unsigned i = 0; i < 16; ++i) { + bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; + if (ThisIsNonZero && First) { + if (NumZero) + V = getZeroVector(MVT::v8i16, DAG); + else + V = DAG.getNode(ISD::UNDEF, MVT::v8i16); + First = false; + } + + if ((i & 1) != 0) { + SDOperand ThisElt(0, 0), LastElt(0, 0); + bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; + if (LastIsNonZero) { + LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); + } + if (ThisIsNonZero) { + ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); + ThisElt = DAG.getNode(ISD::SHL, MVT::i16, + ThisElt, DAG.getConstant(8, MVT::i8)); + if (LastIsNonZero) + ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); + } else + ThisElt = LastElt; + + if (ThisElt.Val) + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, + DAG.getConstant(i/2, MVT::i32)); + } + } + + return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); +} + +/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. +/// +static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, + unsigned NumNonZero, unsigned NumZero, + SelectionDAG &DAG) { + if (NumNonZero > 4) + return SDOperand(); + + SDOperand V(0, 0); + bool First = true; + for (unsigned i = 0; i < 8; ++i) { + bool isNonZero = (NonZeros & (1 << i)) != 0; + if (isNonZero) { + if (First) { + if (NumZero) + V = getZeroVector(MVT::v8i16, DAG); + else + V = DAG.getNode(ISD::UNDEF, MVT::v8i16); + First = false; + } + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), + DAG.getConstant(i, MVT::i32)); + } + } + + return V; +} + +SDOperand +X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { + // All zero's are handled with pxor. + if (ISD::isBuildVectorAllZeros(Op.Val)) + return Op; + + // All one's are handled with pcmpeqd. + if (ISD::isBuildVectorAllOnes(Op.Val)) + return Op; + + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType EVT = MVT::getVectorBaseType(VT); + unsigned EVTBits = MVT::getSizeInBits(EVT); + + unsigned NumElems = Op.getNumOperands(); + unsigned NumZero = 0; + unsigned NumNonZero = 0; + unsigned NonZeros = 0; + std::set Values; + for (unsigned i = 0; i < NumElems; ++i) { + SDOperand Elt = Op.getOperand(i); + if (Elt.getOpcode() != ISD::UNDEF) { + Values.insert(Elt); + if (isZeroNode(Elt)) + NumZero++; + else { + NonZeros |= (1 << i); + NumNonZero++; + } + } + } + + if (NumNonZero == 0) + // Must be a mix of zero and undef. Return a zero vector. + return getZeroVector(VT, DAG); + + // Splat is obviously ok. Let legalizer expand it to a shuffle. + if (Values.size() == 1) + return SDOperand(); + + // Special case for single non-zero element. + if (NumNonZero == 1) { + unsigned Idx = CountTrailingZeros_32(NonZeros); + SDOperand Item = Op.getOperand(Idx); + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); + if (Idx == 0) + // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. + return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, + NumZero > 0, DAG); + + if (EVTBits == 32) { + // Turn it into a shuffle of zero and zero-extended scalar to vector. + Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, + DAG); + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); + std::vector MaskVec; + for (unsigned i = 0; i < NumElems; i++) + MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, + DAG.getNode(ISD::UNDEF, VT), Mask); + } + } + + // Let legalizer expand 2-widde build_vector's. + if (EVTBits == 64) + return SDOperand(); + + // If element VT is < 32 bits, convert it to inserts into a zero vector. + if (EVTBits == 8) { + SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG); + if (V.Val) return V; + } + + if (EVTBits == 16) { + SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG); + if (V.Val) return V; + } + + // If element VT is == 32 bits, turn it into a number of shuffles. + std::vector V(NumElems); + if (NumElems == 4 && NumZero > 0) { + for (unsigned i = 0; i < 4; ++i) { + bool isZero = !(NonZeros & (1 << i)); + if (isZero) + V[i] = getZeroVector(VT, DAG); + else + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); + } + + for (unsigned i = 0; i < 2; ++i) { + switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { + default: break; + case 0: + V[i] = V[i*2]; // Must be a zero vector. + break; + case 1: + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], + getMOVLMask(NumElems, DAG)); + break; + case 2: + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], + getMOVLMask(NumElems, DAG)); + break; + case 3: + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], + getUnpacklMask(NumElems, DAG)); + break; + } + } + + // Take advantage of the fact R32 to VR128 scalar_to_vector (i.e. movd) + // clears the upper bits. + // FIXME: we can do the same for v4f32 case when we know both parts of + // the lower half come from scalar_to_vector (loadf32). We should do + // that in post legalizer dag combiner with target specific hooks. + if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) + return V[0]; + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); + std::vector MaskVec; + bool Reverse = (NonZeros & 0x3) == 2; + for (unsigned i = 0; i < 2; ++i) + if (Reverse) + MaskVec.push_back(DAG.getConstant(1-i, EVT)); + else + MaskVec.push_back(DAG.getConstant(i, EVT)); + Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; + for (unsigned i = 0; i < 2; ++i) + if (Reverse) + MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); + else + MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); + SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); + } + + if (Values.size() > 2) { + // Expand into a number of unpckl*. + // e.g. for v4f32 + // Step 1: unpcklps 0, 2 ==> X: + // : unpcklps 1, 3 ==> Y: + // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> + SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); + for (unsigned i = 0; i < NumElems; ++i) + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); + NumElems >>= 1; + while (NumElems != 0) { + for (unsigned i = 0; i < NumElems; ++i) + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], + UnpckMask); + NumElems >>= 1; + } + return V[0]; + } + + return SDOperand(); +} + +SDOperand +X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { + SDOperand V1 = Op.getOperand(0); + SDOperand V2 = Op.getOperand(1); + SDOperand PermMask = Op.getOperand(2); + MVT::ValueType VT = Op.getValueType(); + unsigned NumElems = PermMask.getNumOperands(); + bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + + if (isSplatMask(PermMask.Val)) { + if (NumElems <= 4) return Op; + // Promote it to a v4i32 splat. + return PromoteSplat(Op, DAG); + } + + if (X86::isMOVLMask(PermMask.Val)) + return (V1IsUndef) ? V2 : Op; + + if (X86::isMOVSHDUPMask(PermMask.Val) || + X86::isMOVSLDUPMask(PermMask.Val) || + X86::isMOVHLPSMask(PermMask.Val) || + X86::isMOVHPMask(PermMask.Val) || + X86::isMOVLPMask(PermMask.Val)) + return Op; + + if (ShouldXformToMOVHLPS(PermMask.Val) || + ShouldXformToMOVLP(V1.Val, PermMask.Val)) + return CommuteVectorShuffle(Op, DAG); + + bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF; + bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF; + if (V1IsSplat && !V2IsSplat) { + Op = CommuteVectorShuffle(Op, DAG); + V1 = Op.getOperand(0); + V2 = Op.getOperand(1); + PermMask = Op.getOperand(2); + V2IsSplat = true; + } + + if (isCommutedMOVL(PermMask.Val, V2IsSplat)) { + if (V2IsUndef) return V1; + Op = CommuteVectorShuffle(Op, DAG); + V1 = Op.getOperand(0); + V2 = Op.getOperand(1); + PermMask = Op.getOperand(2); + if (V2IsSplat) { + // V2 is a splat, so the mask may be malformed. That is, it may point + // to any V2 element. The instruction selectior won't like this. Get + // a corrected mask and commute to form a proper MOVS{S|D}. + SDOperand NewMask = getMOVLMask(NumElems, DAG); + if (NewMask.Val != PermMask.Val) + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); + } + return Op; + } + + if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || + X86::isUNPCKLMask(PermMask.Val) || + X86::isUNPCKHMask(PermMask.Val)) + return Op; + + if (V2IsSplat) { + // Normalize mask so all entries that point to V2 points to its first + // element then try to match unpck{h|l} again. If match, return a + // new vector_shuffle with the corrected mask. + SDOperand NewMask = NormalizeMask(PermMask, DAG); + if (NewMask.Val != PermMask.Val) { + if (X86::isUNPCKLMask(PermMask.Val, true)) { + SDOperand NewMask = getUnpacklMask(NumElems, DAG); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); + } else if (X86::isUNPCKHMask(PermMask.Val, true)) { + SDOperand NewMask = getUnpackhMask(NumElems, DAG); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); + } + } + } + + // Normalize the node to match x86 shuffle ops if needed + if (V2.getOpcode() != ISD::UNDEF) + if (isCommutedSHUFP(PermMask.Val)) { + Op = CommuteVectorShuffle(Op, DAG); + V1 = Op.getOperand(0); + V2 = Op.getOperand(1); + PermMask = Op.getOperand(2); + } + + // If VT is integer, try PSHUF* first, then SHUFP*. + if (MVT::isInteger(VT)) { + if (X86::isPSHUFDMask(PermMask.Val) || + X86::isPSHUFHWMask(PermMask.Val) || + X86::isPSHUFLWMask(PermMask.Val)) { + if (V2.getOpcode() != ISD::UNDEF) + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, + DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); + return Op; + } + + if (X86::isSHUFPMask(PermMask.Val)) + return Op; + + // Handle v8i16 shuffle high / low shuffle node pair. + if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + std::vector MaskVec; + for (unsigned i = 0; i != 4; ++i) + MaskVec.push_back(PermMask.getOperand(i)); + for (unsigned i = 4; i != 8; ++i) + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); + MaskVec.clear(); + for (unsigned i = 0; i != 4; ++i) + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + for (unsigned i = 4; i != 8; ++i) + MaskVec.push_back(PermMask.getOperand(i)); + Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); + } + } else { + // Floating point cases in the other order. + if (X86::isSHUFPMask(PermMask.Val)) + return Op; + if (X86::isPSHUFDMask(PermMask.Val) || + X86::isPSHUFHWMask(PermMask.Val) || + X86::isPSHUFLWMask(PermMask.Val)) { + if (V2.getOpcode() != ISD::UNDEF) + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, + DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); + return Op; + } + } + + if (NumElems == 4) { + // Break it into (shuffle shuffle_hi, shuffle_lo). + MVT::ValueType MaskVT = PermMask.getValueType(); + MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); + std::map > Locs; + std::vector LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); + std::vector HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); + std::vector *MaskPtr = &LoMask; + unsigned MaskIdx = 0; + unsigned LoIdx = 0; + unsigned HiIdx = NumElems/2; + for (unsigned i = 0; i != NumElems; ++i) { + if (i == NumElems/2) { + MaskPtr = &HiMask; + MaskIdx = 1; + LoIdx = 0; + HiIdx = NumElems/2; + } + SDOperand Elt = PermMask.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) { + Locs[i] = std::make_pair(-1, -1); + } else if (cast(Elt)->getValue() < NumElems) { + Locs[i] = std::make_pair(MaskIdx, LoIdx); + (*MaskPtr)[LoIdx] = Elt; + LoIdx++; + } else { + Locs[i] = std::make_pair(MaskIdx, HiIdx); + (*MaskPtr)[HiIdx] = Elt; + HiIdx++; + } + } + + SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask)); + SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask)); + std::vector MaskOps; + for (unsigned i = 0; i != NumElems; ++i) { + if (Locs[i].first == -1) { + MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); + } else { + unsigned Idx = Locs[i].first * NumElems + Locs[i].second; + MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); + } + } + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, + DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps)); + } + + return SDOperand(); +} + +SDOperand +X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { + if (!isa(Op.getOperand(1))) + return SDOperand(); + + MVT::ValueType VT = Op.getValueType(); + // TODO: handle v16i8. + if (MVT::getSizeInBits(VT) == 16) { + // Transform it so it match pextrw which produces a 32-bit result. + MVT::ValueType EVT = (MVT::ValueType)(VT+1); + SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, + Op.getOperand(0), Op.getOperand(1)); + SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, + DAG.getValueType(VT)); + return DAG.getNode(ISD::TRUNCATE, VT, Assert); + } else if (MVT::getSizeInBits(VT) == 32) { + SDOperand Vec = Op.getOperand(0); + unsigned Idx = cast(Op.getOperand(1))->getValue(); + if (Idx == 0) + return Op; + + // SHUFPS the element to the lowest double word, then movss. + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); + SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, + MVT::getVectorBaseType(MaskVT)); + std::vector IdxVec; + IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); + Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), + Vec, Vec, Mask); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, + DAG.getConstant(0, MVT::i32)); + } else if (MVT::getSizeInBits(VT) == 64) { + SDOperand Vec = Op.getOperand(0); + unsigned Idx = cast(Op.getOperand(1))->getValue(); + if (Idx == 0) + return Op; + + // UNPCKHPD the element to the lowest double word, then movsd. + // Note if the lower 64 bits of the result of the UNPCKHPD is then stored + // to a f64mem, the whole operation is folded into a single MOVHPDmr. + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); + std::vector IdxVec; + IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); + Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), + Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, + DAG.getConstant(0, MVT::i32)); + } + + return SDOperand(); +} + +SDOperand +X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { + // Transform it so it match pinsrw which expects a 16-bit value in a R32 + // as its second argument. + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); + SDOperand N0 = Op.getOperand(0); + SDOperand N1 = Op.getOperand(1); + SDOperand N2 = Op.getOperand(2); + if (MVT::getSizeInBits(BaseVT) == 16) { + if (N1.getValueType() != MVT::i32) + N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); + if (N2.getValueType() != MVT::i32) + N2 = DAG.getConstant(cast(N2)->getValue(), MVT::i32); + return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); + } else if (MVT::getSizeInBits(BaseVT) == 32) { + unsigned Idx = cast(N2)->getValue(); + if (Idx == 0) { + // Use a movss. + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); + MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + std::vector MaskVec; + MaskVec.push_back(DAG.getConstant(4, BaseVT)); + for (unsigned i = 1; i <= 3; ++i) + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, + DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec)); + } else { + // Use two pinsrw instructions to insert a 32 bit value. + Idx <<= 1; + if (MVT::isFloatingPoint(N1.getValueType())) { + if (N1.getOpcode() == ISD::LOAD) { + // Just load directly from f32mem to R32. + N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), + N1.getOperand(2)); + } else { + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); + N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); + N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, + DAG.getConstant(0, MVT::i32)); + } + } + N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); + N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, + DAG.getConstant(Idx, MVT::i32)); + N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); + N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, + DAG.getConstant(Idx+1, MVT::i32)); + return DAG.getNode(ISD::BIT_CONVERT, VT, N0); + } + } + + return SDOperand(); +} + +SDOperand +X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { + SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); + return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); +} + +// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as +// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is +// one of the above mentioned nodes. It has to be wrapped because otherwise +// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only +// be used to form addressing mode. These wrapped nodes will be selected +// into MOV32ri. +SDOperand +X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { + ConstantPoolSDNode *CP = cast(Op); + SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), + DAG.getTargetConstantPool(CP->get(), getPointerTy(), + CP->getAlignment())); + if (Subtarget->isTargetDarwin()) { + // With PIC, the address is actually $g + Offset. + if (getTargetMachine().getRelocationModel() == Reloc::PIC) + Result = DAG.getNode(ISD::ADD, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); + } + + return Result; +} + +SDOperand +X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { + GlobalValue *GV = cast(Op)->getGlobal(); + SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), + DAG.getTargetGlobalAddress(GV, getPointerTy())); + if (Subtarget->isTargetDarwin()) { + // With PIC, the address is actually $g + Offset. + if (getTargetMachine().getRelocationModel() == Reloc::PIC) + Result = DAG.getNode(ISD::ADD, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); + + // For Darwin, external and weak symbols are indirect, so we want to load + // the value at address GV, not the value of GV itself. This means that + // the GlobalAddress must be in the base or index register of the address, + // not the GV offset field. + if (getTargetMachine().getRelocationModel() != Reloc::Static && + DarwinGVRequiresExtraLoad(GV)) + Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), + Result, DAG.getSrcValue(NULL)); + } + + return Result; +} + +SDOperand +X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { + const char *Sym = cast(Op)->getSymbol(); + SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), + DAG.getTargetExternalSymbol(Sym, getPointerTy())); + if (Subtarget->isTargetDarwin()) { + // With PIC, the address is actually $g + Offset. + if (getTargetMachine().getRelocationModel() == Reloc::PIC) + Result = DAG.getNode(ISD::ADD, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); + } + + return Result; +} + +SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { + assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && + "Not an i64 shift!"); + bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; + SDOperand ShOpLo = Op.getOperand(0); + SDOperand ShOpHi = Op.getOperand(1); + SDOperand ShAmt = Op.getOperand(2); + SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, + DAG.getConstant(31, MVT::i8)) + : DAG.getConstant(0, MVT::i32); + + SDOperand Tmp2, Tmp3; + if (Op.getOpcode() == ISD::SHL_PARTS) { + Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); + Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); + } else { + Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); + Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); + } + + SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, + ShAmt, DAG.getConstant(32, MVT::i8)); + + SDOperand Hi, Lo; + SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); + + std::vector Tys; + Tys.push_back(MVT::i32); + Tys.push_back(MVT::Flag); + std::vector Ops; + if (Op.getOpcode() == ISD::SHL_PARTS) { + Ops.push_back(Tmp2); + Ops.push_back(Tmp3); + Ops.push_back(CC); + Ops.push_back(InFlag); + Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); + InFlag = Hi.getValue(1); + + Ops.clear(); + Ops.push_back(Tmp3); + Ops.push_back(Tmp1); + Ops.push_back(CC); + Ops.push_back(InFlag); + Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); + } else { + Ops.push_back(Tmp2); + Ops.push_back(Tmp3); + Ops.push_back(CC); + Ops.push_back(InFlag); + Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); + InFlag = Lo.getValue(1); + + Ops.clear(); + Ops.push_back(Tmp3); + Ops.push_back(Tmp1); + Ops.push_back(CC); + Ops.push_back(InFlag); + Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); + } + + Tys.clear(); + Tys.push_back(MVT::i32); + Tys.push_back(MVT::i32); + Ops.clear(); + Ops.push_back(Lo); + Ops.push_back(Hi); + return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); +} + +SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { + assert(Op.getOperand(0).getValueType() <= MVT::i64 && + Op.getOperand(0).getValueType() >= MVT::i16 && + "Unknown SINT_TO_FP to lower!"); + + SDOperand Result; + MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); + unsigned Size = MVT::getSizeInBits(SrcVT)/8; + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, + DAG.getEntryNode(), Op.getOperand(0), + StackSlot, DAG.getSrcValue(NULL)); + + // Build the FILD + std::vector Tys; + Tys.push_back(MVT::f64); + Tys.push_back(MVT::Other); + if (X86ScalarSSE) Tys.push_back(MVT::Flag); + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(StackSlot); + Ops.push_back(DAG.getValueType(SrcVT)); + Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, + Tys, Ops); + + if (X86ScalarSSE) { + Chain = Result.getValue(1); + SDOperand InFlag = Result.getValue(2); + + // FIXME: Currently the FST is flagged to the FILD_FLAG. This + // shouldn't be necessary except that RFP cannot be live across + // multiple blocks. When stackifier is fixed, they can be uncoupled. + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + std::vector Tys; + Tys.push_back(MVT::Other); std::vector Ops; Ops.push_back(Chain); - Ops.push_back(Value); + Ops.push_back(Result); Ops.push_back(StackSlot); - SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); + Ops.push_back(DAG.getValueType(Op.getValueType())); + Ops.push_back(InFlag); + Chain = DAG.getNode(X86ISD::FST, Tys, Ops); + Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, + DAG.getSrcValue(NULL)); + } + + return Result; +} + +SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { + assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && + "Unknown FP_TO_SINT to lower!"); + // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary + // stack slot. + MachineFunction &MF = DAG.getMachineFunction(); + unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; + int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - // Load the result. - return DAG.getLoad(Op.getValueType(), FIST, StackSlot, - DAG.getSrcValue(NULL)); + unsigned Opc; + switch (Op.getValueType()) { + default: assert(0 && "Invalid FP_TO_SINT to lower!"); + case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; + case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; + case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; } - case ISD::READCYCLECOUNTER: { + + SDOperand Chain = DAG.getEntryNode(); + SDOperand Value = Op.getOperand(0); + if (X86ScalarSSE) { + assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, + DAG.getSrcValue(0)); std::vector Tys; + Tys.push_back(MVT::f64); Tys.push_back(MVT::Other); - Tys.push_back(MVT::Flag); std::vector Ops; - Ops.push_back(Op.getOperand(0)); - SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); - Ops.clear(); - Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); - Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, - MVT::i32, Ops[0].getValue(2))); - Ops.push_back(Ops[1].getValue(1)); - Tys[0] = Tys[1] = MVT::i32; - Tys.push_back(MVT::Other); - return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); + Ops.push_back(Chain); + Ops.push_back(StackSlot); + Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); + Value = DAG.getNode(X86ISD::FLD, Tys, Ops); + Chain = Value.getValue(1); + SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); } - case ISD::FABS: { - MVT::ValueType VT = Op.getValueType(); - const Type *OpNTy = MVT::getTypeForValueType(VT); - std::vector CV; - if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - } else { - CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - } - Constant *CS = ConstantStruct::get(CV); - SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); - SDOperand Mask - = DAG.getNode(X86ISD::LOAD_PACK, - VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); - return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); - } - case ISD::FNEG: { - MVT::ValueType VT = Op.getValueType(); - const Type *OpNTy = MVT::getTypeForValueType(VT); - std::vector CV; - if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - } else { - CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - } - Constant *CS = ConstantStruct::get(CV); - SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); - SDOperand Mask - = DAG.getNode(X86ISD::LOAD_PACK, - VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); - return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); - } - case ISD::SETCC: { - assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); - SDOperand Cond; - SDOperand CC = Op.getOperand(2); - ISD::CondCode SetCCOpcode = cast(CC)->get(); - bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); - bool Flip; - unsigned X86CC; - if (translateX86CC(CC, isFP, X86CC, Flip)) { - if (Flip) - Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, - Op.getOperand(1), Op.getOperand(0)); - else - Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, - Op.getOperand(0), Op.getOperand(1)); - return DAG.getNode(X86ISD::SETCC, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), Cond); - } else { - assert(isFP && "Illegal integer SetCC!"); + // Build the FP_TO_INT*_IN_MEM + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(Value); + Ops.push_back(StackSlot); + SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); + + // Load the result. + return DAG.getLoad(Op.getValueType(), FIST, StackSlot, + DAG.getSrcValue(NULL)); +} + +SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType VT = Op.getValueType(); + const Type *OpNTy = MVT::getTypeForValueType(VT); + std::vector CV; + if (VT == MVT::f64) { + CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + } else { + CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + } + Constant *CS = ConstantStruct::get(CV); + SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); + SDOperand Mask + = DAG.getNode(X86ISD::LOAD_PACK, + VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); + return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); +} + +SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType VT = Op.getValueType(); + const Type *OpNTy = MVT::getTypeForValueType(VT); + std::vector CV; + if (VT == MVT::f64) { + CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + } else { + CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + } + Constant *CS = ConstantStruct::get(CV); + SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); + SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, + VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); + return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); +} + +SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { + assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); + SDOperand Cond; + SDOperand CC = Op.getOperand(2); + ISD::CondCode SetCCOpcode = cast(CC)->get(); + bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); + bool Flip; + unsigned X86CC; + if (translateX86CC(CC, isFP, X86CC, Flip)) { + if (Flip) + Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, + Op.getOperand(1), Op.getOperand(0)); + else Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, Op.getOperand(0), Op.getOperand(1)); - std::vector Tys; - std::vector Ops; - switch (SetCCOpcode) { + return DAG.getNode(X86ISD::SETCC, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond); + } else { + assert(isFP && "Illegal integer SetCC!"); + + Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, + Op.getOperand(0), Op.getOperand(1)); + std::vector Tys; + std::vector Ops; + switch (SetCCOpcode) { default: assert(false && "Illegal floating point SetCC!"); case ISD::SETOEQ: { // !PF & ZF Tys.push_back(MVT::i8); @@ -1596,386 +3093,161 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { Tmp1.getValue(1)); return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); } - } } } - case ISD::SELECT: { - MVT::ValueType VT = Op.getValueType(); - bool isFP = MVT::isFloatingPoint(VT); - bool isFPStack = isFP && !X86ScalarSSE; - bool isFPSSE = isFP && X86ScalarSSE; - bool addTest = false; - SDOperand Op0 = Op.getOperand(0); - SDOperand Cond, CC; - if (Op0.getOpcode() == ISD::SETCC) - Op0 = LowerOperation(Op0, DAG); - - if (Op0.getOpcode() == X86ISD::SETCC) { - // If condition flag is set by a X86ISD::CMP, then make a copy of it - // (since flag operand cannot be shared). If the X86ISD::SETCC does not - // have another use it will be eliminated. - // If the X86ISD::SETCC has more than one use, then it's probably better - // to use a test instead of duplicating the X86ISD::CMP (for register - // pressure reason). - if (Op0.getOperand(1).getOpcode() == X86ISD::CMP) { - if (!Op0.hasOneUse()) { - std::vector Tys; - for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) - Tys.push_back(Op0.Val->getValueType(i)); - std::vector Ops; - for (unsigned i = 0; i < Op0.getNumOperands(); ++i) - Ops.push_back(Op0.getOperand(i)); - Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); - } - - CC = Op0.getOperand(0); - Cond = Op0.getOperand(1); - // Make a copy as flag result cannot be used by more than one. - Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, - Cond.getOperand(0), Cond.getOperand(1)); - addTest = - isFPStack && !hasFPCMov(cast(CC)->getSignExtended()); - } else - addTest = true; - } else - addTest = true; - - if (addTest) { - CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); - Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); - } +} - std::vector Tys; - Tys.push_back(Op.getValueType()); - Tys.push_back(MVT::Flag); - std::vector Ops; - // X86ISD::CMOV means set the result (which is operand 1) to the RHS if - // condition is true. - Ops.push_back(Op.getOperand(2)); - Ops.push_back(Op.getOperand(1)); - Ops.push_back(CC); - Ops.push_back(Cond); - return DAG.getNode(X86ISD::CMOV, Tys, Ops); - } - case ISD::BRCOND: { - bool addTest = false; - SDOperand Cond = Op.getOperand(1); - SDOperand Dest = Op.getOperand(2); - SDOperand CC; - if (Cond.getOpcode() == ISD::SETCC) - Cond = LowerOperation(Cond, DAG); - - if (Cond.getOpcode() == X86ISD::SETCC) { - // If condition flag is set by a X86ISD::CMP, then make a copy of it - // (since flag operand cannot be shared). If the X86ISD::SETCC does not - // have another use it will be eliminated. - // If the X86ISD::SETCC has more than one use, then it's probably better - // to use a test instead of duplicating the X86ISD::CMP (for register - // pressure reason). - if (Cond.getOperand(1).getOpcode() == X86ISD::CMP) { - if (!Cond.hasOneUse()) { - std::vector Tys; - for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) - Tys.push_back(Cond.Val->getValueType(i)); - std::vector Ops; - for (unsigned i = 0; i < Cond.getNumOperands(); ++i) - Ops.push_back(Cond.getOperand(i)); - Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); - } +SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType VT = Op.getValueType(); + bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; + bool addTest = false; + SDOperand Op0 = Op.getOperand(0); + SDOperand Cond, CC; + if (Op0.getOpcode() == ISD::SETCC) + Op0 = LowerOperation(Op0, DAG); + + if (Op0.getOpcode() == X86ISD::SETCC) { + // If condition flag is set by a X86ISD::CMP, then make a copy of it + // (since flag operand cannot be shared). If the X86ISD::SETCC does not + // have another use it will be eliminated. + // If the X86ISD::SETCC has more than one use, then it's probably better + // to use a test instead of duplicating the X86ISD::CMP (for register + // pressure reason). + unsigned CmpOpc = Op0.getOperand(1).getOpcode(); + if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || + CmpOpc == X86ISD::UCOMI) { + if (!Op0.hasOneUse()) { + std::vector Tys; + for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) + Tys.push_back(Op0.Val->getValueType(i)); + std::vector Ops; + for (unsigned i = 0; i < Op0.getNumOperands(); ++i) + Ops.push_back(Op0.getOperand(i)); + Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); + } - CC = Cond.getOperand(0); - Cond = Cond.getOperand(1); - // Make a copy as flag result cannot be used by more than one. - Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, - Cond.getOperand(0), Cond.getOperand(1)); - } else - addTest = true; + CC = Op0.getOperand(0); + Cond = Op0.getOperand(1); + // Make a copy as flag result cannot be used by more than one. + Cond = DAG.getNode(CmpOpc, MVT::Flag, + Cond.getOperand(0), Cond.getOperand(1)); + addTest = + isFPStack && !hasFPCMov(cast(CC)->getSignExtended()); } else addTest = true; + } else + addTest = true; - if (addTest) { - CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); - Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); - } - return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), - Op.getOperand(0), Op.getOperand(2), CC, Cond); - } - case ISD::MEMSET: { - SDOperand InFlag(0, 0); - SDOperand Chain = Op.getOperand(0); - unsigned Align = - (unsigned)cast(Op.getOperand(4))->getValue(); - if (Align == 0) Align = 1; - - ConstantSDNode *I = dyn_cast(Op.getOperand(3)); - // If not DWORD aligned, call memset if size is less than the threshold. - // It knows how to align to the right boundary first. - if ((Align & 3) != 0 || - !(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) { - MVT::ValueType IntPtr = getPointerTy(); - const Type *IntPtrTy = getTargetData().getIntPtrType(); - std::vector > Args; - Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); - // Extend the ubyte argument to be an int value for the call. - SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); - Args.push_back(std::make_pair(Val, IntPtrTy)); - Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); - std::pair CallResult = - LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, - DAG.getExternalSymbol("memset", IntPtr), Args, DAG); - return CallResult.second; - } - - MVT::ValueType AVT; - SDOperand Count; - ConstantSDNode *ValC = dyn_cast(Op.getOperand(2)); - unsigned BytesLeft = 0; - if (ValC) { - unsigned ValReg; - unsigned Val = ValC->getValue() & 255; - - // If the value is a constant, then we can potentially use larger sets. - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - Count = DAG.getConstant(I->getValue() / 2, MVT::i32); - BytesLeft = I->getValue() % 2; - Val = (Val << 8) | Val; - ValReg = X86::AX; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - Count = DAG.getConstant(I->getValue() / 4, MVT::i32); - BytesLeft = I->getValue() % 4; - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - ValReg = X86::EAX; - break; - default: // Byte aligned - AVT = MVT::i8; - Count = Op.getOperand(3); - ValReg = X86::AL; - break; - } - - Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), - InFlag); - InFlag = Chain.getValue(1); - } else { - AVT = MVT::i8; - Count = Op.getOperand(3); - Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); - InFlag = Chain.getValue(1); - } - - Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getNode(X86ISD::REP_STOS, MVT::Other, Chain, - DAG.getValueType(AVT), InFlag); - - if (BytesLeft) { - // Issue stores for the last 1 - 3 bytes. - SDOperand Value; - unsigned Val = ValC->getValue() & 255; - unsigned Offset = I->getValue() - BytesLeft; - SDOperand DstAddr = Op.getOperand(1); - MVT::ValueType AddrVT = DstAddr.getValueType(); - if (BytesLeft >= 2) { - Value = DAG.getConstant((Val << 8) | Val, MVT::i16); - Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - DAG.getSrcValue(NULL)); - BytesLeft -= 2; - Offset += 2; - } - - if (BytesLeft == 1) { - Value = DAG.getConstant(Val, MVT::i8); - Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - DAG.getSrcValue(NULL)); - } - } - - return Chain; - } - case ISD::MEMCPY: { - SDOperand Chain = Op.getOperand(0); - unsigned Align = - (unsigned)cast(Op.getOperand(4))->getValue(); - if (Align == 0) Align = 1; - - ConstantSDNode *I = dyn_cast(Op.getOperand(3)); - // If not DWORD aligned, call memcpy if size is less than the threshold. - // It knows how to align to the right boundary first. - if ((Align & 3) != 0 || - !(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) { - MVT::ValueType IntPtr = getPointerTy(); - const Type *IntPtrTy = getTargetData().getIntPtrType(); - std::vector > Args; - Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); - Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); - Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); - std::pair CallResult = - LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, - DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); - return CallResult.second; - } - - MVT::ValueType AVT; - SDOperand Count; - unsigned BytesLeft = 0; - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - Count = DAG.getConstant(I->getValue() / 2, MVT::i32); - BytesLeft = I->getValue() % 2; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - Count = DAG.getConstant(I->getValue() / 4, MVT::i32); - BytesLeft = I->getValue() % 4; - break; - default: // Byte aligned - AVT = MVT::i8; - Count = Op.getOperand(3); - break; - } + if (addTest) { + CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); + Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); + } - SDOperand InFlag(0, 0); - Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getNode(X86ISD::REP_MOVS, MVT::Other, Chain, - DAG.getValueType(AVT), InFlag); - - if (BytesLeft) { - // Issue loads and stores for the last 1 - 3 bytes. - unsigned Offset = I->getValue() - BytesLeft; - SDOperand DstAddr = Op.getOperand(1); - MVT::ValueType DstVT = DstAddr.getValueType(); - SDOperand SrcAddr = Op.getOperand(2); - MVT::ValueType SrcVT = SrcAddr.getValueType(); - SDOperand Value; - if (BytesLeft >= 2) { - Value = DAG.getLoad(MVT::i16, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - DAG.getSrcValue(NULL)); - Chain = Value.getValue(1); - Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - DAG.getSrcValue(NULL)); - BytesLeft -= 2; - Offset += 2; - } + std::vector Tys; + Tys.push_back(Op.getValueType()); + Tys.push_back(MVT::Flag); + std::vector Ops; + // X86ISD::CMOV means set the result (which is operand 1) to the RHS if + // condition is true. + Ops.push_back(Op.getOperand(2)); + Ops.push_back(Op.getOperand(1)); + Ops.push_back(CC); + Ops.push_back(Cond); + return DAG.getNode(X86ISD::CMOV, Tys, Ops); +} - if (BytesLeft == 1) { - Value = DAG.getLoad(MVT::i8, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - DAG.getSrcValue(NULL)); - Chain = Value.getValue(1); - Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - DAG.getSrcValue(NULL)); +SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { + bool addTest = false; + SDOperand Cond = Op.getOperand(1); + SDOperand Dest = Op.getOperand(2); + SDOperand CC; + if (Cond.getOpcode() == ISD::SETCC) + Cond = LowerOperation(Cond, DAG); + + if (Cond.getOpcode() == X86ISD::SETCC) { + // If condition flag is set by a X86ISD::CMP, then make a copy of it + // (since flag operand cannot be shared). If the X86ISD::SETCC does not + // have another use it will be eliminated. + // If the X86ISD::SETCC has more than one use, then it's probably better + // to use a test instead of duplicating the X86ISD::CMP (for register + // pressure reason). + unsigned CmpOpc = Cond.getOperand(1).getOpcode(); + if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || + CmpOpc == X86ISD::UCOMI) { + if (!Cond.hasOneUse()) { + std::vector Tys; + for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) + Tys.push_back(Cond.Val->getValueType(i)); + std::vector Ops; + for (unsigned i = 0; i < Cond.getNumOperands(); ++i) + Ops.push_back(Cond.getOperand(i)); + Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); } - } - return Chain; - } - - // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their - // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is - // one of the above mentioned nodes. It has to be wrapped because otherwise - // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only - // be used to form addressing mode. These wrapped nodes will be selected - // into MOV32ri. - case ISD::ConstantPool: { - ConstantPoolSDNode *CP = cast(Op); - SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), - DAG.getTargetConstantPool(CP->get(), getPointerTy(), - CP->getAlignment())); - if (getTargetMachine().getSubtarget().isTargetDarwin()) { - // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC) - Result = DAG.getNode(ISD::ADD, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); - } + CC = Cond.getOperand(0); + Cond = Cond.getOperand(1); + // Make a copy as flag result cannot be used by more than one. + Cond = DAG.getNode(CmpOpc, MVT::Flag, + Cond.getOperand(0), Cond.getOperand(1)); + } else + addTest = true; + } else + addTest = true; - return Result; - } - case ISD::GlobalAddress: { - GlobalValue *GV = cast(Op)->getGlobal(); - SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), - DAG.getTargetGlobalAddress(GV, getPointerTy())); - if (getTargetMachine(). - getSubtarget().isTargetDarwin()) { - // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC) - Result = DAG.getNode(ISD::ADD, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); + if (addTest) { + CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); + Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); + } + return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), + Op.getOperand(0), Op.getOperand(2), CC, Cond); +} - // For Darwin, external and weak symbols are indirect, so we want to load - // the value at address GV, not the value of GV itself. This means that - // the GlobalAddress must be in the base or index register of the address, - // not the GV offset field. - if (getTargetMachine().getRelocationModel() != Reloc::Static && - (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || - (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()))) - Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), - Result, DAG.getSrcValue(NULL)); - } +SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { + JumpTableSDNode *JT = cast(Op); + SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), + DAG.getTargetJumpTable(JT->getIndex(), + getPointerTy())); + if (Subtarget->isTargetDarwin()) { + // With PIC, the address is actually $g + Offset. + if (getTargetMachine().getRelocationModel() == Reloc::PIC) + Result = DAG.getNode(ISD::ADD, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); + } - return Result; - } - case ISD::ExternalSymbol: { - const char *Sym = cast(Op)->getSymbol(); - SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), - DAG.getTargetExternalSymbol(Sym, getPointerTy())); - if (getTargetMachine(). - getSubtarget().isTargetDarwin()) { - // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC) - Result = DAG.getNode(ISD::ADD, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); - } + return Result; +} - return Result; - } - case ISD::VASTART: { - // vastart just stores the address of the VarArgsFrameIndex slot into the - // memory location argument. - // FIXME: Replace MVT::i32 with PointerTy - SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); - return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, - Op.getOperand(1), Op.getOperand(2)); - } - case ISD::RET: { - SDOperand Copy; +SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { + SDOperand Copy; - switch(Op.getNumOperands()) { + switch(Op.getNumOperands()) { default: assert(0 && "Do not know how to return this many arguments!"); abort(); - case 1: + case 1: // ret void. return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), - DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); + DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); case 2: { MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); - if (MVT::isInteger(ArgVT)) + + if (MVT::isVector(ArgVT)) { + // Integer or FP vector result -> XMM0. + if (DAG.getMachineFunction().liveout_empty()) + DAG.getMachineFunction().addLiveOut(X86::XMM0); + Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), + SDOperand()); + } else if (MVT::isInteger(ArgVT)) { + // Integer result -> EAX + if (DAG.getMachineFunction().liveout_empty()) + DAG.getMachineFunction().addLiveOut(X86::EAX); + Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), SDOperand()); - else if (!X86ScalarSSE) { + } else if (!X86ScalarSSE) { + // FP return with fp-stack value. + if (DAG.getMachineFunction().liveout_empty()) + DAG.getMachineFunction().addLiveOut(X86::ST0); + std::vector Tys; Tys.push_back(MVT::Other); Tys.push_back(MVT::Flag); @@ -1984,6 +3256,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(Op.getOperand(1)); Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); } else { + // FP return with ScalarSSE (return on fp-stack). + if (DAG.getMachineFunction().liveout_empty()) + DAG.getMachineFunction().addLiveOut(X86::ST0); + SDOperand MemLoc; SDOperand Chain = Op.getOperand(0); SDOperand Value = Op.getOperand(1); @@ -2020,18 +3296,456 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { break; } case 3: + if (DAG.getMachineFunction().liveout_empty()) { + DAG.getMachineFunction().addLiveOut(X86::EAX); + DAG.getMachineFunction().addLiveOut(X86::EDX); + } + Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), SDOperand()); Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); break; + } + return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, + Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), + Copy.getValue(1)); +} + +SDOperand +X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { + if (FormalArgs.size() == 0) { + unsigned CC = cast(Op.getOperand(0))->getValue(); + if (CC == CallingConv::Fast && EnableFastCC) + LowerFastCCArguments(Op, DAG); + else + LowerCCCArguments(Op, DAG); + } + return FormalArgs[Op.ResNo]; +} + +SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { + SDOperand InFlag(0, 0); + SDOperand Chain = Op.getOperand(0); + unsigned Align = + (unsigned)cast(Op.getOperand(4))->getValue(); + if (Align == 0) Align = 1; + + ConstantSDNode *I = dyn_cast(Op.getOperand(3)); + // If not DWORD aligned, call memset if size is less than the threshold. + // It knows how to align to the right boundary first. + if ((Align & 3) != 0 || + (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { + MVT::ValueType IntPtr = getPointerTy(); + const Type *IntPtrTy = getTargetData().getIntPtrType(); + std::vector > Args; + Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); + // Extend the ubyte argument to be an int value for the call. + SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); + Args.push_back(std::make_pair(Val, IntPtrTy)); + Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); + std::pair CallResult = + LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, + DAG.getExternalSymbol("memset", IntPtr), Args, DAG); + return CallResult.second; + } + + MVT::ValueType AVT; + SDOperand Count; + ConstantSDNode *ValC = dyn_cast(Op.getOperand(2)); + unsigned BytesLeft = 0; + bool TwoRepStos = false; + if (ValC) { + unsigned ValReg; + unsigned Val = ValC->getValue() & 255; + + // If the value is a constant, then we can potentially use larger sets. + switch (Align & 3) { + case 2: // WORD aligned + AVT = MVT::i16; + Count = DAG.getConstant(I->getValue() / 2, MVT::i32); + BytesLeft = I->getValue() % 2; + Val = (Val << 8) | Val; + ValReg = X86::AX; + break; + case 0: // DWORD aligned + AVT = MVT::i32; + if (I) { + Count = DAG.getConstant(I->getValue() / 4, MVT::i32); + BytesLeft = I->getValue() % 4; + } else { + Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), + DAG.getConstant(2, MVT::i8)); + TwoRepStos = true; + } + Val = (Val << 8) | Val; + Val = (Val << 16) | Val; + ValReg = X86::EAX; + break; + default: // Byte aligned + AVT = MVT::i8; + Count = Op.getOperand(3); + ValReg = X86::AL; + break; + } + + Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), + InFlag); + InFlag = Chain.getValue(1); + } else { + AVT = MVT::i8; + Count = Op.getOperand(3); + Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); + InFlag = Chain.getValue(1); + } + + Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); + InFlag = Chain.getValue(1); + + std::vector Tys; + Tys.push_back(MVT::Other); + Tys.push_back(MVT::Flag); + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getValueType(AVT)); + Ops.push_back(InFlag); + Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); + + if (TwoRepStos) { + InFlag = Chain.getValue(1); + Count = Op.getOperand(3); + MVT::ValueType CVT = Count.getValueType(); + SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, + DAG.getConstant(3, CVT)); + Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); + InFlag = Chain.getValue(1); + Tys.clear(); + Tys.push_back(MVT::Other); + Tys.push_back(MVT::Flag); + Ops.clear(); + Ops.push_back(Chain); + Ops.push_back(DAG.getValueType(MVT::i8)); + Ops.push_back(InFlag); + Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); + } else if (BytesLeft) { + // Issue stores for the last 1 - 3 bytes. + SDOperand Value; + unsigned Val = ValC->getValue() & 255; + unsigned Offset = I->getValue() - BytesLeft; + SDOperand DstAddr = Op.getOperand(1); + MVT::ValueType AddrVT = DstAddr.getValueType(); + if (BytesLeft >= 2) { + Value = DAG.getConstant((Val << 8) | Val, MVT::i16); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, AddrVT, DstAddr, + DAG.getConstant(Offset, AddrVT)), + DAG.getSrcValue(NULL)); + BytesLeft -= 2; + Offset += 2; + } + + if (BytesLeft == 1) { + Value = DAG.getConstant(Val, MVT::i8); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, AddrVT, DstAddr, + DAG.getConstant(Offset, AddrVT)), + DAG.getSrcValue(NULL)); + } + } + + return Chain; +} + +SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { + SDOperand Chain = Op.getOperand(0); + unsigned Align = + (unsigned)cast(Op.getOperand(4))->getValue(); + if (Align == 0) Align = 1; + + ConstantSDNode *I = dyn_cast(Op.getOperand(3)); + // If not DWORD aligned, call memcpy if size is less than the threshold. + // It knows how to align to the right boundary first. + if ((Align & 3) != 0 || + (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { + MVT::ValueType IntPtr = getPointerTy(); + const Type *IntPtrTy = getTargetData().getIntPtrType(); + std::vector > Args; + Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); + Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); + Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); + std::pair CallResult = + LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, + DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); + return CallResult.second; + } + + MVT::ValueType AVT; + SDOperand Count; + unsigned BytesLeft = 0; + bool TwoRepMovs = false; + switch (Align & 3) { + case 2: // WORD aligned + AVT = MVT::i16; + Count = DAG.getConstant(I->getValue() / 2, MVT::i32); + BytesLeft = I->getValue() % 2; + break; + case 0: // DWORD aligned + AVT = MVT::i32; + if (I) { + Count = DAG.getConstant(I->getValue() / 4, MVT::i32); + BytesLeft = I->getValue() % 4; + } else { + Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), + DAG.getConstant(2, MVT::i8)); + TwoRepMovs = true; + } + break; + default: // Byte aligned + AVT = MVT::i8; + Count = Op.getOperand(3); + break; + } + + SDOperand InFlag(0, 0); + Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); + InFlag = Chain.getValue(1); + + std::vector Tys; + Tys.push_back(MVT::Other); + Tys.push_back(MVT::Flag); + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getValueType(AVT)); + Ops.push_back(InFlag); + Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); + + if (TwoRepMovs) { + InFlag = Chain.getValue(1); + Count = Op.getOperand(3); + MVT::ValueType CVT = Count.getValueType(); + SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, + DAG.getConstant(3, CVT)); + Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); + InFlag = Chain.getValue(1); + Tys.clear(); + Tys.push_back(MVT::Other); + Tys.push_back(MVT::Flag); + Ops.clear(); + Ops.push_back(Chain); + Ops.push_back(DAG.getValueType(MVT::i8)); + Ops.push_back(InFlag); + Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); + } else if (BytesLeft) { + // Issue loads and stores for the last 1 - 3 bytes. + unsigned Offset = I->getValue() - BytesLeft; + SDOperand DstAddr = Op.getOperand(1); + MVT::ValueType DstVT = DstAddr.getValueType(); + SDOperand SrcAddr = Op.getOperand(2); + MVT::ValueType SrcVT = SrcAddr.getValueType(); + SDOperand Value; + if (BytesLeft >= 2) { + Value = DAG.getLoad(MVT::i16, Chain, + DAG.getNode(ISD::ADD, SrcVT, SrcAddr, + DAG.getConstant(Offset, SrcVT)), + DAG.getSrcValue(NULL)); + Chain = Value.getValue(1); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, DstVT, DstAddr, + DAG.getConstant(Offset, DstVT)), + DAG.getSrcValue(NULL)); + BytesLeft -= 2; + Offset += 2; + } + + if (BytesLeft == 1) { + Value = DAG.getLoad(MVT::i8, Chain, + DAG.getNode(ISD::ADD, SrcVT, SrcAddr, + DAG.getConstant(Offset, SrcVT)), + DAG.getSrcValue(NULL)); + Chain = Value.getValue(1); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, DstVT, DstAddr, + DAG.getConstant(Offset, DstVT)), + DAG.getSrcValue(NULL)); + } + } + + return Chain; +} + +SDOperand +X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { + std::vector Tys; + Tys.push_back(MVT::Other); + Tys.push_back(MVT::Flag); + std::vector Ops; + Ops.push_back(Op.getOperand(0)); + SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); + Ops.clear(); + Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); + Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, + MVT::i32, Ops[0].getValue(2))); + Ops.push_back(Ops[1].getValue(1)); + Tys[0] = Tys[1] = MVT::i32; + Tys.push_back(MVT::Other); + return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); +} + +SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + // FIXME: Replace MVT::i32 with PointerTy + SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); + return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, + Op.getOperand(1), Op.getOperand(2)); +} + +SDOperand +X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { + unsigned IntNo = cast(Op.getOperand(0))->getValue(); + switch (IntNo) { + default: return SDOperand(); // Don't custom lower most intrinsics. + // Comparison intrinsics. + case Intrinsic::x86_sse_comieq_ss: + case Intrinsic::x86_sse_comilt_ss: + case Intrinsic::x86_sse_comile_ss: + case Intrinsic::x86_sse_comigt_ss: + case Intrinsic::x86_sse_comige_ss: + case Intrinsic::x86_sse_comineq_ss: + case Intrinsic::x86_sse_ucomieq_ss: + case Intrinsic::x86_sse_ucomilt_ss: + case Intrinsic::x86_sse_ucomile_ss: + case Intrinsic::x86_sse_ucomigt_ss: + case Intrinsic::x86_sse_ucomige_ss: + case Intrinsic::x86_sse_ucomineq_ss: + case Intrinsic::x86_sse2_comieq_sd: + case Intrinsic::x86_sse2_comilt_sd: + case Intrinsic::x86_sse2_comile_sd: + case Intrinsic::x86_sse2_comigt_sd: + case Intrinsic::x86_sse2_comige_sd: + case Intrinsic::x86_sse2_comineq_sd: + case Intrinsic::x86_sse2_ucomieq_sd: + case Intrinsic::x86_sse2_ucomilt_sd: + case Intrinsic::x86_sse2_ucomile_sd: + case Intrinsic::x86_sse2_ucomigt_sd: + case Intrinsic::x86_sse2_ucomige_sd: + case Intrinsic::x86_sse2_ucomineq_sd: { + unsigned Opc = 0; + ISD::CondCode CC = ISD::SETCC_INVALID; + switch (IntNo) { + default: break; + case Intrinsic::x86_sse_comieq_ss: + case Intrinsic::x86_sse2_comieq_sd: + Opc = X86ISD::COMI; + CC = ISD::SETEQ; + break; + case Intrinsic::x86_sse_comilt_ss: + case Intrinsic::x86_sse2_comilt_sd: + Opc = X86ISD::COMI; + CC = ISD::SETLT; + break; + case Intrinsic::x86_sse_comile_ss: + case Intrinsic::x86_sse2_comile_sd: + Opc = X86ISD::COMI; + CC = ISD::SETLE; + break; + case Intrinsic::x86_sse_comigt_ss: + case Intrinsic::x86_sse2_comigt_sd: + Opc = X86ISD::COMI; + CC = ISD::SETGT; + break; + case Intrinsic::x86_sse_comige_ss: + case Intrinsic::x86_sse2_comige_sd: + Opc = X86ISD::COMI; + CC = ISD::SETGE; + break; + case Intrinsic::x86_sse_comineq_ss: + case Intrinsic::x86_sse2_comineq_sd: + Opc = X86ISD::COMI; + CC = ISD::SETNE; + break; + case Intrinsic::x86_sse_ucomieq_ss: + case Intrinsic::x86_sse2_ucomieq_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETEQ; + break; + case Intrinsic::x86_sse_ucomilt_ss: + case Intrinsic::x86_sse2_ucomilt_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETLT; + break; + case Intrinsic::x86_sse_ucomile_ss: + case Intrinsic::x86_sse2_ucomile_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETLE; + break; + case Intrinsic::x86_sse_ucomigt_ss: + case Intrinsic::x86_sse2_ucomigt_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETGT; + break; + case Intrinsic::x86_sse_ucomige_ss: + case Intrinsic::x86_sse2_ucomige_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETGE; + break; + case Intrinsic::x86_sse_ucomineq_ss: + case Intrinsic::x86_sse2_ucomineq_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETNE; + break; } - return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, - Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), - Copy.getValue(1)); + bool Flip; + unsigned X86CC; + translateX86CC(CC, true, X86CC, Flip); + SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), + Op.getOperand(Flip?1:2)); + SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond); + return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); } } } +/// LowerOperation - Provide custom lowering hooks for some operations. +/// +SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + default: assert(0 && "Should not custom lower this!"); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); + case ISD::SHL_PARTS: + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: return LowerShift(Op, DAG); + case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); + case ISD::FABS: return LowerFABS(Op, DAG); + case ISD::FNEG: return LowerFNEG(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::RET: return LowerRET(Op, DAG); + case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); + case ISD::MEMSET: return LowerMEMSET(Op, DAG); + case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); + case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + } +} + const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return NULL; @@ -2053,6 +3767,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; case X86ISD::CMP: return "X86ISD::CMP"; case X86ISD::TEST: return "X86ISD::TEST"; + case X86ISD::COMI: return "X86ISD::COMI"; + case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; @@ -2062,6 +3778,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; + case X86ISD::S2VEC: return "X86ISD::S2VEC"; + case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; + case X86ISD::PINSRW: return "X86ISD::PINSRW"; } } @@ -2070,14 +3789,17 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, uint64_t &KnownZero, uint64_t &KnownOne, unsigned Depth) const { - unsigned Opc = Op.getOpcode(); - KnownZero = KnownOne = 0; // Don't know anything. + assert((Opc >= ISD::BUILTIN_OP_END || + Opc == ISD::INTRINSIC_WO_CHAIN || + Opc == ISD::INTRINSIC_W_CHAIN || + Opc == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + KnownZero = KnownOne = 0; // Don't know anything. switch (Opc) { - default: - assert(Opc >= ISD::BUILTIN_OP_END && "Expected a target specific node"); - break; + default: break; case X86ISD::SETCC: KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); break; @@ -2119,3 +3841,53 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, return std::vector(); } + +/// isLegalAddressImmediate - Return true if the integer value or +/// GlobalValue can be used as the offset of the target addressing mode. +bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { + // X86 allows a sign-extended 32-bit immediate field. + return (V > -(1LL << 32) && V < (1LL << 32)-1); +} + +bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { + if (Subtarget->isTargetDarwin()) { + Reloc::Model RModel = getTargetMachine().getRelocationModel(); + if (RModel == Reloc::Static) + return true; + else if (RModel == Reloc::DynamicNoPIC) + return !DarwinGVRequiresExtraLoad(GV); + else + return false; + } else + return true; +} + +/// isShuffleMaskLegal - Targets can use this to indicate that they only +/// support *some* VECTOR_SHUFFLE operations, those with specific masks. +/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values +/// are assumed to be legal. +bool +X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { + // Only do shuffles on 128-bit vector types for now. + if (MVT::getSizeInBits(VT) == 64) return false; + return (Mask.Val->getNumOperands() <= 4 || + isSplatMask(Mask.Val) || + isPSHUFHW_PSHUFLWMask(Mask.Val) || + X86::isUNPCKLMask(Mask.Val) || + X86::isUNPCKL_v_undef_Mask(Mask.Val) || + X86::isUNPCKHMask(Mask.Val)); +} + +bool X86TargetLowering::isVectorClearMaskLegal(std::vector &BVOps, + MVT::ValueType EVT, + SelectionDAG &DAG) const { + unsigned NumElts = BVOps.size(); + // Only do shuffles on 128-bit vector types for now. + if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; + if (NumElts == 2) return true; + if (NumElts == 4) { + return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || + isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); + } + return false; +}