X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=35c9056d536b56a7e2b61d4bd691896544cdd26b;hb=d1474d09cbe5fdeec8ba0d6c6b52f316f3422532;hp=cf3a6ce3b50301eb0cf4ca68f9da31c01343ea94;hpb=2ee3db3003deb18461a72b82166d8d417925d06f;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cf3a6ce3b50..35c9056d536 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15,37 +15,38 @@ #include "X86.h" #include "X86InstrBuilder.h" #include "X86ISelLowering.h" -#include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/VectorExtras.h" -#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); // Forward declarations. -static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl); +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2); X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) : TargetLowering(TM) { @@ -54,8 +55,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) X86ScalarSSEf32 = Subtarget->hasSSE1(); X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; - bool Fast = false; - RegInfo = TM.getRegisterInfo(); TD = getTargetData(); @@ -115,16 +114,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->is64Bit()) { setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); - } else { - if (!UseSoftFloat && !NoImplicitFloat && X86ScalarSSEf64) { + } else if (!UseSoftFloat) { + if (X86ScalarSSEf64) { // We have an impenetrably clever algorithm for ui64->double only. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); - - // We have faster algorithm for ui32->single only. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); - } else { - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); } + // We have an algorithm for SSE2, and we turn this into a 64-bit + // FILD for other targets. + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); } // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have @@ -132,7 +129,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); - if (!UseSoftFloat && !NoImplicitFloat) { + if (!UseSoftFloat) { // SSE has no i16 to fp conversion, only i32 if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); @@ -175,15 +172,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->is64Bit()) { setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); - } else { + } else if (!UseSoftFloat) { if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) // Expand FP_TO_UINT into a select. // FIXME: We would like to use a Custom expander here eventually to do // the optimal thing for SSE vs. the default expansion in the legalizer. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); else - // With SSE3 we can use fisttpll to convert to a signed i64. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); + // With SSE3 we can use fisttpll to convert to a signed i64; without + // SSE, we're stuck with a fistpll. + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); } // TODO: when we have SSE, these could be more efficient, by using movd/movq. @@ -410,16 +408,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // cases we handle. addLegalFPImmediate(APFloat(+0.0)); // xorpd addLegalFPImmediate(APFloat(+0.0f)); // xorps - - // Floating truncations from f80 and extensions to f80 go through memory. - // If optimizing, we lie about this though and handle it in - // InstructionSelectPreprocess so that dagcombine2 can hack on these. - if (Fast) { - setConvertAction(MVT::f32, MVT::f80, Expand); - setConvertAction(MVT::f64, MVT::f80, Expand); - setConvertAction(MVT::f80, MVT::f32, Expand); - setConvertAction(MVT::f80, MVT::f64, Expand); - } } else if (!UseSoftFloat && X86ScalarSSEf32) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. @@ -449,18 +437,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS - // SSE <-> X87 conversions go through memory. If optimizing, we lie about - // this though and handle it in InstructionSelectPreprocess so that - // dagcombine2 can hack on these. - if (Fast) { - setConvertAction(MVT::f32, MVT::f64, Expand); - setConvertAction(MVT::f32, MVT::f80, Expand); - setConvertAction(MVT::f80, MVT::f32, Expand); - setConvertAction(MVT::f64, MVT::f32, Expand); - // And x87->x87 truncations also. - setConvertAction(MVT::f80, MVT::f64, Expand); - } - if (!UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f64 , Expand); @@ -476,15 +452,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - // Floating truncations go through memory. If optimizing, we lie about - // this though and handle it in InstructionSelectPreprocess so that - // dagcombine2 can hack on these. - if (Fast) { - setConvertAction(MVT::f80, MVT::f32, Expand); - setConvertAction(MVT::f64, MVT::f32, Expand); - setConvertAction(MVT::f80, MVT::f64, Expand); - } - if (!UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f64 , Expand); @@ -557,6 +524,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); @@ -585,6 +553,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand); } // FIXME: In order to prevent SSE instructions being expanded to MMX ones @@ -730,6 +702,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) continue; + // Do not attempt to custom lower non-128-bit vectors + if (!VT.is128BitVector()) + continue; setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -748,17 +723,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. - for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { - setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64); + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) { + MVT VT = (MVT::SimpleValueType)i; + + // Do not attempt to promote non-128-bit vectors + if (!VT.is128BitVector()) { + continue; + } + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v2i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v2i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v2i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -769,6 +750,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v2f64, Custom); setOperationAction(ISD::SELECT, MVT::v2i64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + if (!DisableMMX && Subtarget->hasMMX()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); + } } if (Subtarget->hasSSE41()) { @@ -799,6 +786,114 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); } + if (!UseSoftFloat && Subtarget->hasAVX()) { + addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); + addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); + addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); + addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); + + setOperationAction(ISD::LOAD, MVT::v8f32, Legal); + setOperationAction(ISD::LOAD, MVT::v8i32, Legal); + setOperationAction(ISD::LOAD, MVT::v4f64, Legal); + setOperationAction(ISD::LOAD, MVT::v4i64, Legal); + setOperationAction(ISD::FADD, MVT::v8f32, Legal); + setOperationAction(ISD::FSUB, MVT::v8f32, Legal); + setOperationAction(ISD::FMUL, MVT::v8f32, Legal); + setOperationAction(ISD::FDIV, MVT::v8f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); + setOperationAction(ISD::FNEG, MVT::v8f32, Custom); + //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); + //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom); + //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom); + //setOperationAction(ISD::SELECT, MVT::v8f32, Custom); + //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom); + + // Operations to consider commented out -v16i16 v32i8 + //setOperationAction(ISD::ADD, MVT::v16i16, Legal); + setOperationAction(ISD::ADD, MVT::v8i32, Custom); + setOperationAction(ISD::ADD, MVT::v4i64, Custom); + //setOperationAction(ISD::SUB, MVT::v32i8, Legal); + //setOperationAction(ISD::SUB, MVT::v16i16, Legal); + setOperationAction(ISD::SUB, MVT::v8i32, Custom); + setOperationAction(ISD::SUB, MVT::v4i64, Custom); + //setOperationAction(ISD::MUL, MVT::v16i16, Legal); + setOperationAction(ISD::FADD, MVT::v4f64, Legal); + setOperationAction(ISD::FSUB, MVT::v4f64, Legal); + setOperationAction(ISD::FMUL, MVT::v4f64, Legal); + setOperationAction(ISD::FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); + setOperationAction(ISD::FNEG, MVT::v4f64, Custom); + + setOperationAction(ISD::VSETCC, MVT::v4f64, Custom); + // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom); + // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom); + setOperationAction(ISD::VSETCC, MVT::v8i32, Custom); + + // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom); + // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom); + // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom); + +#if 0 + // Not sure we want to do this since there are no 256-bit integer + // operations in AVX + + // Custom lower build_vector, vector_shuffle, and extract_vector_elt. + // This includes 256-bit vectors + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) { + MVT VT = (MVT::SimpleValueType)i; + + // Do not attempt to custom lower non-power-of-2 vectors + if (!isPowerOf2_32(VT.getVectorNumElements())) + continue; + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + } + + if (Subtarget->is64Bit()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom); + } +#endif + +#if 0 + // Not sure we want to do this since there are no 256-bit integer + // operations in AVX + + // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64. + // Including 256-bit vectors + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) { + MVT VT = (MVT::SimpleValueType)i; + + if (!VT.is256BitVector()) { + continue; + } + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v4i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v4i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v4i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); + } + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); +#endif + } + // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -813,8 +908,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::USUBO, MVT::i64, Custom); setOperationAction(ISD::SMULO, MVT::i32, Custom); setOperationAction(ISD::SMULO, MVT::i64, Custom); - setOperationAction(ISD::UMULO, MVT::i32, Custom); - setOperationAction(ISD::UMULO, MVT::i64, Custom); if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. @@ -831,6 +924,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::MEMBARRIER); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -843,6 +937,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores allowUnalignedMemoryAccesses = true; // x86 supports it! setPrefLoopAlignment(16); + benefitFromCodePlacementOpt = true; } @@ -902,11 +997,14 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { /// determining it. MVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr) const { + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. - if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) { + const Function *F = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) { if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) return MVT::v4i32; if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) @@ -923,7 +1021,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { if (usesGlobalOffsetTable()) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); - if (!Subtarget->isPICStyleRIPRel()) + if (!Subtarget->is64Bit()) // This doesn't have DebugLoc associated with it, but is not really the // same as a Register. return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(), @@ -931,6 +1029,11 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, return Table; } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { + return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4; +} + //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -945,7 +1048,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { SmallVector RVLocs; unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs, DAG.getContext()); CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86); // If this is the first return lowered for this function, add the regs to the @@ -965,7 +1068,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { SDValue StackAdjustment = TailCall.getOperand(2); assert(((TargetAddress.getOpcode() == ISD::Register && (cast(TargetAddress)->getReg() == X86::EAX || - cast(TargetAddress)->getReg() == X86::R9)) || + cast(TargetAddress)->getReg() == X86::R11)) || TargetAddress.getOpcode() == ISD::TargetExternalSymbol || TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && "Expecting an global address, external symbol, or register"); @@ -1071,7 +1174,8 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, SmallVector RVLocs; bool isVarArg = TheCall->isVarArg(); bool Is64Bit = Subtarget->is64Bit(); - CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), + RVLocs, DAG.getContext()); CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); SmallVector ResultVals; @@ -1084,8 +1188,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) { - cerr << "SSE register return with SSE disabled\n"; - exit(1); + llvm_report_error("SSE register return with SSE disabled"); } // If this is a call to a function that returns an fp value on the floating @@ -1147,17 +1250,6 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, // For info on fast calling convention see Fast Calling Convention (tail call) // implementation LowerX86_32FastCCCallTo. -/// AddLiveIn - This helper function adds the specified physical register to the -/// MachineFunction as a live in value. It also creates a corresponding virtual -/// register for it. -static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, - const TargetRegisterClass *RC) { - assert(RC->contains(PReg) && "Not the correct regclass!"); - unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); - MF.getRegInfo().addLiveIn(PReg, VReg); - return VReg; -} - /// CallIsStructReturn - Determines whether a CALL node uses struct return /// semantics. static bool CallIsStructReturn(CallSDNode *TheCall) { @@ -1203,8 +1295,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; - else if (CC == CallingConv::Fast && PerformTailCallOpt) - return CC_X86_64_TailCall; else return CC_X86_64_C; } @@ -1230,23 +1320,6 @@ X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) { } -/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer -/// in a register before calling. -bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) { - return !IsTailCall && !Is64Bit && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT(); -} - -/// CallRequiresFnAddressInReg - Check whether the call requires the function -/// address to be loaded in a register. -bool -X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) { - return !Is64Bit && IsTailCall && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT(); -} - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1311,7 +1384,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, DAG.getContext()); CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC)); SmallVector ArgValues; @@ -1356,7 +1429,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { assert(0 && "Unknown argument type!"); } - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); + unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 @@ -1449,11 +1522,12 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, TotalNumXMMRegs); + bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); - assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) && + assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1()) + if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; @@ -1472,8 +1546,8 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(VarArgsGPOffset)); for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { - unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass); + unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], + X86::GR64RegisterClass); SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1487,8 +1561,8 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(VarArgsFPOffset)); for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { - unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], - X86::VR128RegisterClass); + unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], + X86::VR128RegisterClass); SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1605,7 +1679,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, DAG.getContext()); CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC)); // Get a count of how many bytes are to be pushed on the stack. @@ -1678,9 +1752,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); - Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, - DAG.getUNDEF(MVT::v2i64), Arg, - getMOVLMask(2, DAG, dl)); + Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); break; } } @@ -1714,30 +1786,34 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { InFlag = Chain.getValue(1); } - // ELF / PIC requires GOT in the EBX register before function calls via PLT - // GOT pointer. - if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) { - Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc::getUnknownLoc(), - getPointerTy()), - InFlag); - InFlag = Chain.getValue(1); - } - // If we are tail calling and generating PIC/GOT style code load the address - // of the callee into ecx. The value in ecx is used as target of the tail - // jump. This is done to circumvent the ebx/callee-saved problem for tail - // calls on PIC/GOT architectures. Normally we would just put the address of - // GOT into ebx and then call target@PLT. But for tail callss ebx would be - // restored (since ebx is callee saved) before jumping to the target@PLT. - if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) { - // Note: The actual moving to ecx is done further down. - GlobalAddressSDNode *G = dyn_cast(Callee); - if (G && !G->getGlobal()->hasHiddenVisibility() && - !G->getGlobal()->hasProtectedVisibility()) - Callee = LowerGlobalAddress(Callee, DAG); - else if (isa(Callee)) - Callee = LowerExternalSymbol(Callee,DAG); + + if (Subtarget->isPICStyleGOT()) { + // ELF / PIC requires GOT in the EBX register before function calls via PLT + // GOT pointer. + if (!IsTailCall) { + Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), + getPointerTy()), + InFlag); + InFlag = Chain.getValue(1); + } else { + // If we are tail calling and generating PIC/GOT style code load the + // address of the callee into ECX. The value in ecx is used as target of + // the tail jump. This is done to circumvent the ebx/callee-saved problem + // for tail calls on PIC/GOT architectures. Normally we would just put the + // address of GOT into ebx and then call target@PLT. But for tail calls + // ebx would be restored (since ebx is callee saved) before jumping to the + // target@PLT. + + // Note: The actual moving to ECX is done further down. + GlobalAddressSDNode *G = dyn_cast(Callee); + if (G && !G->getGlobal()->hasHiddenVisibility() && + !G->getGlobal()->hasProtectedVisibility()) + Callee = LowerGlobalAddress(Callee, DAG); + else if (isa(Callee)) + Callee = LowerExternalSymbol(Callee, DAG); + } } if (Is64Bit && isVarArg) { @@ -1825,14 +1901,50 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { // We should use extra load for direct calls to dllimported functions in // non-JIT mode. - if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), - getTargetMachine(), true)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(), - G->getOffset()); + GlobalValue *GV = G->getGlobal(); + if (!Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), true)) { + unsigned char OpFlags = 0; + + // On ELF targets, in both X86-64 and X86-32 mode, direct calls to + // external symbols most go through the PLT in PIC mode. If the symbol + // has hidden or protected visibility, or if it is static or local, then + // we don't need to use the PLT - we can directly call it. + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_ && + GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { + OpFlags = X86II::MO_PLT; + } else if (Subtarget->isPICStyleStub() && + (GV->isDeclaration() || GV->isWeakForLinker()) && + Subtarget->getDarwinVers() < 9) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = X86II::MO_DARWIN_STUB; + } + + Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(), + G->getOffset(), OpFlags); + } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + unsigned char OpFlags = 0; + + // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external + // symbols should go through the PLT. + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) { + OpFlags = X86II::MO_PLT; + } else if (Subtarget->isPICStyleStub() && + Subtarget->getDarwinVers() < 9) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = X86II::MO_DARWIN_STUB; + } + + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), + OpFlags); } else if (IsTailCall) { - unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; + unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Opc, getPointerTy()), @@ -1869,9 +1981,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { RegsToPass[i].second.getValueType())); // Add an implicit use GOT pointer in EBX. - if (!IsTailCall && !Is64Bit && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) + if (!IsTailCall && Subtarget->isPICStyleGOT()) Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); // Add an implicit use of AL for x86 vararg functions. @@ -1984,22 +2094,11 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, return false; if (CheckTailCallReturnConstraints(TheCall, Ret)) { - MachineFunction &MF = DAG.getMachineFunction(); - unsigned CallerCC = MF.getFunction()->getCallingConv(); - unsigned CalleeCC= TheCall->getCallingConv(); - if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { - SDValue Callee = TheCall->getCallee(); - // On x86/32Bit PIC/GOT tail calls are supported. - if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || - !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit()) - return true; - - // Can only do local tail calls (in same module, hidden or protected) on - // x86_64 PIC/GOT at the moment. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) - return G->getGlobal()->hasHiddenVisibility() - || G->getGlobal()->hasProtectedVisibility(); - } + unsigned CallerCC = + DAG.getMachineFunction().getFunction()->getCallingConv(); + unsigned CalleeCC = TheCall->getCallingConv(); + if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) + return true; } return false; @@ -2149,186 +2248,164 @@ static bool hasFPCMov(unsigned X86CC) { } } -/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return -/// true if Op is undef or if its value falls within the specified range (L, H]. -static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { - if (Op.getOpcode() == ISD::UNDEF) - return true; - - unsigned Val = cast(Op)->getZExtValue(); - return (Val >= Low && Val < Hi); +/// isUndefOrInRange - Return true if Val is undef or if its value falls within +/// the specified range (L, H]. +static bool isUndefOrInRange(int Val, int Low, int Hi) { + return (Val < 0) || (Val >= Low && Val < Hi); } -/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return -/// true if Op is undef or if its value equal to the specified value. -static bool isUndefOrEqual(SDValue Op, unsigned Val) { - if (Op.getOpcode() == ISD::UNDEF) +/// isUndefOrEqual - Val is either less than zero (undef) or equal to the +/// specified value. +static bool isUndefOrEqual(int Val, int CmpVal) { + if (Val < 0 || Val == CmpVal) return true; - return cast(Op)->getZExtValue() == Val; + return false; } -/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFD. -bool X86::isPSHUFDMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 2 && N->getNumOperands() != 4) - return false; - - // Check if the value doesn't reference the second vector. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast(Arg)->getZExtValue() >= e) - return false; - } - - return true; +/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference +/// the second operand. +static bool isPSHUFDMask(const SmallVectorImpl &Mask, MVT VT) { + if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) + return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); + if (VT == MVT::v2f64 || VT == MVT::v2i64) + return (Mask[0] < 2 && Mask[1] < 2); + return false; } -/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFHW. -bool X86::isPSHUFHWMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isPSHUFDMask(M, N->getValueType(0)); +} - if (N->getNumOperands() != 8) +/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFHW. +static bool isPSHUFHWMask(const SmallVectorImpl &Mask, MVT VT) { + if (VT != MVT::v8i16) return false; - - // Lower quadword copied in order. - for (unsigned i = 0; i != 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast(Arg)->getZExtValue() != i) + + // Lower quadword copied in order or undef. + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 0 && Mask[i] != i) return false; - } - + // Upper quadword shuffled. - for (unsigned i = 4; i != 8; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val < 4 || Val > 7) + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) return false; - } - + return true; } -/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFLW. -bool X86::isPSHUFLWMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isPSHUFHWMask(M, N->getValueType(0)); +} - if (N->getNumOperands() != 8) +/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFLW. +static bool isPSHUFLWMask(const SmallVectorImpl &Mask, MVT VT) { + if (VT != MVT::v8i16) return false; - + // Upper quadword copied in order. - for (unsigned i = 4; i != 8; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && Mask[i] != i) return false; - + // Lower quadword shuffled. - for (unsigned i = 0; i != 4; ++i) - if (!isUndefOrInRange(N->getOperand(i), 0, 4)) + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 4) return false; - + return true; } +bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isPSHUFLWMask(M, N->getValueType(0)); +} + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. -template -static bool isSHUFPMask(SDOperand *Elems, unsigned NumElems) { - if (NumElems != 2 && NumElems != 4) return false; - - unsigned Half = NumElems / 2; - for (unsigned i = 0; i < Half; ++i) - if (!isUndefOrInRange(Elems[i], 0, NumElems)) +static bool isSHUFPMask(const SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) return false; - for (unsigned i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - + return true; } -bool X86::isSHUFPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); +bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isSHUFPMask(M, N->getValueType(0)); } /// isCommutedSHUFP - Returns true if the shuffle mask is exactly /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. -template -static bool isCommutedSHUFP(SDOperand *Ops, unsigned NumOps) { - if (NumOps != 2 && NumOps != 4) return false; - - unsigned Half = NumOps / 2; - for (unsigned i = 0; i < Half; ++i) - if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) +static bool isCommutedSHUFPMask(const SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - for (unsigned i = Half; i < NumOps; ++i) - if (!isUndefOrInRange(Ops[i], 0, NumOps)) + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) return false; return true; } -static bool isCommutedSHUFP(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); +static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return isCommutedSHUFPMask(M, N->getValueType(0)); } /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. -bool X86::isMOVHLPSMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 - return isUndefOrEqual(N->getOperand(0), 6) && - isUndefOrEqual(N->getOperand(1), 7) && - isUndefOrEqual(N->getOperand(2), 2) && - isUndefOrEqual(N->getOperand(3), 3); -} - -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) - return false; - - // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 - return isUndefOrEqual(N->getOperand(0), 2) && - isUndefOrEqual(N->getOperand(1), 3) && - isUndefOrEqual(N->getOperand(2), 2) && - isUndefOrEqual(N->getOperand(3), 3); + return isUndefOrEqual(N->getMaskElt(0), 6) && + isUndefOrEqual(N->getMaskElt(1), 7) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); } /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. -bool X86::isMOVLPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); - unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) + if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems)) return false; for (unsigned i = NumElems/2; i < NumElems; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; return true; @@ -2337,37 +2414,49 @@ bool X86::isMOVLPMask(SDNode *N) { /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} /// and MOVLHPS. -bool X86::isMOVHPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); - unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; - for (unsigned i = 0; i < NumElems/2; ++i) { - SDValue Arg = N->getOperand(i + NumElems/2); - if (!isUndefOrEqual(Arg, i + NumElems)) + for (unsigned i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems)) return false; - } return true; } +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 4) + return false; + + return isUndefOrEqual(N->getMaskElt(0), 2) && + isUndefOrEqual(N->getMaskElt(1), 3) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); +} + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -template -bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts, +static bool isUNPCKLMask(const SmallVectorImpl &Mask, MVT VT, bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { - SDValue BitI = Elts[i]; - SDValue BitI1 = Elts[i+1]; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (V2IsSplat) { @@ -2378,26 +2467,26 @@ bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts, return false; } } - return true; } -bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); +bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + SmallVector M; + N->getMask(M); + return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -template -bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts, +static bool isUNPCKHMask(const SmallVectorImpl &Mask, MVT VT, bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { - SDValue BitI = Elts[i]; - SDValue BitI1 = Elts[i+1]; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j + NumElts/2)) return false; if (V2IsSplat) { @@ -2408,270 +2497,177 @@ bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts, return false; } } - return true; } -bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); +bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + SmallVector M; + N->getMask(M); + return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned NumElems = N->getNumOperands(); +static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { - SDValue BitI = N->getOperand(i); - SDValue BitI1 = N->getOperand(i+1); - + + for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; } +bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0)); +} + /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned NumElems = N->getNumOperands(); +static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { - SDValue BitI = N->getOperand(i); - SDValue BitI1 = N->getOperand(i + 1); - + + for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; } +bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0)); +} + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. -template -static bool isMOVLMask(SDOperand *Elts, unsigned NumElts) { - if (NumElts != 2 && NumElts != 4) +static bool isMOVLMask(const SmallVectorImpl &Mask, MVT VT) { + if (VT.getVectorElementType().getSizeInBits() < 32) return false; - if (!isUndefOrEqual(Elts[0], NumElts)) + int NumElts = VT.getVectorNumElements(); + + if (!isUndefOrEqual(Mask[0], NumElts)) return false; - - for (unsigned i = 1; i < NumElts; ++i) { - if (!isUndefOrEqual(Elts[i], i)) + + for (int i = 1; i < NumElts; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - } - + return true; } -bool X86::isMOVLMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isMOVLMask(N->op_begin(), N->getNumOperands()); +bool X86::isMOVLMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isMOVLMask(M, N->getValueType(0)); } /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -template -static bool isCommutedMOVL(SDOperand *Ops, unsigned NumOps, - bool V2IsSplat = false, - bool V2IsUndef = false) { +static bool isCommutedMOVLMask(const SmallVectorImpl &Mask, MVT VT, + bool V2IsSplat = false, bool V2IsUndef = false) { + int NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; - - if (!isUndefOrEqual(Ops[0], 0)) + + if (!isUndefOrEqual(Mask[0], 0)) return false; - - for (unsigned i = 1; i < NumOps; ++i) { - SDValue Arg = Ops[i]; - if (!(isUndefOrEqual(Arg, i+NumOps) || - (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || - (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) + + for (int i = 1; i < NumOps; ++i) + if (!(isUndefOrEqual(Mask[i], i+NumOps) || + (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || + (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) return false; - } - + return true; } -static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, +static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, bool V2IsUndef = false) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return isCommutedMOVL(N->op_begin(), N->getNumOperands(), - V2IsSplat, V2IsUndef); + SmallVector M; + N->getMask(M); + return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef); } /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. -bool X86::isMOVSHDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect 1, 1, 3, 3 for (unsigned i = 0; i < 2; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val != 1) return false; + int Elt = N->getMaskElt(i); + if (Elt >= 0 && Elt != 1) + return false; } bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val != 3) return false; - HasHi = true; + int Elt = N->getMaskElt(i); + if (Elt >= 0 && Elt != 3) + return false; + if (Elt == 3) + HasHi = true; } - // Don't use movshdup if it can be done with a shufps. + // FIXME: verify that matching u, u, 3, 3 is what we want. return HasHi; } /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. -bool X86::isMOVSLDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect 0, 0, 2, 2 - for (unsigned i = 0; i < 2; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val != 0) return false; - } + for (unsigned i = 0; i < 2; ++i) + if (N->getMaskElt(i) > 0) + return false; bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val != 2) return false; - HasHi = true; - } - - // Don't use movshdup if it can be done with a shufps. - return HasHi; -} - -/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a identity operation on the LHS or RHS. -static bool isIdentityMask(SDNode *N, bool RHS = false) { - unsigned NumElems = N->getNumOperands(); - for (unsigned i = 0; i < NumElems; ++i) - if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) + int Elt = N->getMaskElt(i); + if (Elt >= 0 && Elt != 2) return false; - return true; -} - -/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies -/// a splat of a single element. -static bool isSplatMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - // This is a splat operation if each element of the permute is the same, and - // if the value doesn't reference the second vector. - unsigned NumElems = N->getNumOperands(); - SDValue ElementBase; - unsigned i = 0; - for (; i != NumElems; ++i) { - SDValue Elt = N->getOperand(i); - if (isa(Elt)) { - ElementBase = Elt; - break; - } + if (Elt == 2) + HasHi = true; } - - if (!ElementBase.getNode()) - return false; - - for (; i != NumElems; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (Arg != ElementBase) return false; - } - - // Make sure it is a splat of the first vector operand. - return cast(ElementBase)->getZExtValue() < NumElems; -} - -/// getSplatMaskEltNo - Given a splat mask, return the index to the element -/// we want to splat. -static SDValue getSplatMaskEltNo(SDNode *N) { - assert(isSplatMask(N) && "Not a splat mask"); - unsigned NumElems = N->getNumOperands(); - SDValue ElementBase; - unsigned i = 0; - for (; i != NumElems; ++i) { - SDValue Elt = N->getOperand(i); - if (isa(Elt)) - return Elt; - } - assert(0 && " No splat value found!"); - return SDValue(); -} - - -/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies -/// a splat of a single element and it's a 2 or 4 element mask. -bool X86::isSplatMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - // We can only splat 64-bit, and 32-bit quantities with a single instruction. - if (N->getNumOperands() != 4 && N->getNumOperands() != 2) - return false; - return ::isSplatMask(N); -} - -/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a splat of zero element. -bool X86::isSplatLoMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) - if (!isUndefOrEqual(N->getOperand(i), 0)) - return false; - return true; + // Don't use movsldup if it can be done with a shufps. + return HasHi; } /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. -bool X86::isMOVDDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned e = N->getNumOperands() / 2; - for (unsigned i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) +bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { + int e = N->getValueType(0).getVectorNumElements() / 2; + + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; - for (unsigned i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getOperand(e+i), i)) + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getMaskElt(e+i), i)) return false; return true; } @@ -2680,20 +2676,19 @@ bool X86::isMOVDDUPMask(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { - unsigned NumOperands = N->getNumOperands(); + ShuffleVectorSDNode *SVOp = cast(N); + int NumOperands = SVOp->getValueType(0).getVectorNumElements(); + unsigned Shift = (NumOperands == 4) ? 2 : 1; unsigned Mask = 0; - for (unsigned i = 0; i < NumOperands; ++i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(NumOperands-i-1); - if (Arg.getOpcode() != ISD::UNDEF) - Val = cast(Arg)->getZExtValue(); + for (int i = 0; i < NumOperands; ++i) { + int Val = SVOp->getMaskElt(NumOperands-i-1); + if (Val < 0) Val = 0; if (Val >= NumOperands) Val -= NumOperands; Mask |= Val; if (i != NumOperands - 1) Mask <<= Shift; } - return Mask; } @@ -2701,19 +2696,16 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW /// instructions. unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast(N); unsigned Mask = 0; // 8 nodes, but we only care about the last 4. for (unsigned i = 7; i >= 4; --i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - Val = cast(Arg)->getZExtValue(); + int Val = SVOp->getMaskElt(i); + if (Val >= 0) Mask |= (Val - 4); - } if (i != 4) Mask <<= 2; } - return Mask; } @@ -2721,90 +2713,67 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// instructions. unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast(N); unsigned Mask = 0; // 8 nodes, but we only care about the first 4. for (int i = 3; i >= 0; --i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) - Val = cast(Arg)->getZExtValue(); - Mask |= Val; + int Val = SVOp->getMaskElt(i); + if (Val >= 0) + Mask |= Val; if (i != 0) Mask <<= 2; } - return Mask; } -/// CommuteVectorShuffle - Swap vector_shuffle operands as well as -/// values in ther permute mask. -static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, - SDValue &V2, SDValue &Mask, - SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT MaskVT = Mask.getValueType(); - MVT EltVT = MaskVT.getVectorElementType(); - unsigned NumElems = Mask.getNumOperands(); - SmallVector MaskVec; - DebugLoc dl = Op.getDebugLoc(); - +/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in +/// their permute mask. +static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector MaskVec; + for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - MaskVec.push_back(DAG.getUNDEF(EltVT)); - continue; - } - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val < NumElems) - MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); + int idx = SVOp->getMaskElt(i); + if (idx < 0) + MaskVec.push_back(idx); + else if (idx < (int)NumElems) + MaskVec.push_back(idx + NumElems); else - MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + MaskVec.push_back(idx - NumElems); } - - std::swap(V1, V2); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1), + SVOp->getOperand(0), &MaskVec[0]); } /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static -SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = Mask.getValueType(); - MVT EltVT = MaskVT.getVectorElementType(); - unsigned NumElems = Mask.getNumOperands(); - SmallVector MaskVec; +static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, MVT VT) { + unsigned NumElems = VT.getVectorNumElements(); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - MaskVec.push_back(DAG.getUNDEF(EltVT)); + int idx = Mask[i]; + if (idx < 0) continue; - } - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val < NumElems) - MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); + else if (idx < (int)NumElems) + Mask[i] = idx + NumElems; else - MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + Mask[i] = idx - NumElems; } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); } - /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). -static bool ShouldXformToMOVHLPS(SDNode *Mask) { - unsigned NumElems = Mask->getNumOperands(); - if (NumElems != 4) +static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { + if (Op->getValueType(0).getVectorNumElements() != 4) return false; for (unsigned i = 0, e = 2; i != e; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+2)) + if (!isUndefOrEqual(Op->getMaskElt(i), i+2)) return false; for (unsigned i = 2; i != 4; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+4)) + if (!isUndefOrEqual(Op->getMaskElt(i), i+4)) return false; return true; } @@ -2828,7 +2797,8 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). And since V1 will become the source of the /// MOVLP, it must be either a vector load or a scalar load to vector. -static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { +static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, + ShuffleVectorSDNode *Op) { if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) return false; // Is V2 is a vector load, don't do this transformation. We will try to use @@ -2836,14 +2806,15 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { if (ISD::isNON_EXTLoad(V2)) return false; - unsigned NumElems = Mask->getNumOperands(); + unsigned NumElems = Op->getValueType(0).getVectorNumElements(); + if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0, e = NumElems/2; i != e; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i)) + if (!isUndefOrEqual(Op->getMaskElt(i), i)) return false; for (unsigned i = NumElems/2; i != NumElems; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) + if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems)) return false; return true; } @@ -2861,29 +2832,6 @@ static bool isSplatVector(SDNode *N) { return true; } -/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an undef. -static bool isUndefShuffle(SDNode *N) { - if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return false; - - SDValue V1 = N->getOperand(0); - SDValue V2 = N->getOperand(1); - SDValue Mask = N->getOperand(2); - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - unsigned Val = cast(Arg)->getZExtValue(); - if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) - return false; - else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) - return false; - } - } - return true; -} - /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. static inline bool isZeroNode(SDValue Elt) { @@ -2894,34 +2842,25 @@ static inline bool isZeroNode(SDValue Elt) { } /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an zero vector. -static bool isZeroShuffle(SDNode *N) { - if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return false; - +/// to an zero vector. +/// FIXME: move to dag combiner / method on ShuffleVectorSDNode +static bool isZeroShuffle(ShuffleVectorSDNode *N) { SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); - SDValue Mask = N->getOperand(2); - unsigned NumElems = Mask.getNumOperands(); + unsigned NumElems = N->getValueType(0).getVectorNumElements(); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) - continue; - - unsigned Idx = cast(Arg)->getZExtValue(); - if (Idx < NumElems) { - unsigned Opc = V1.getNode()->getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) + int Idx = N->getMaskElt(i); + if (Idx >= (int)NumElems) { + unsigned Opc = V2.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V1.getNode()->getOperand(Idx))) + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) return false; - } else if (Idx >= NumElems) { - unsigned Opc = V2.getNode()->getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) + } else if (Idx >= 0) { + unsigned Opc = V1.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) return false; } } @@ -2969,169 +2908,92 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. -static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { - assert(Mask.getOpcode() == ISD::BUILD_VECTOR); - +static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + bool Changed = false; - SmallVector MaskVec; - unsigned NumElems = Mask.getNumOperands(); + SmallVector MaskVec; + SVOp->getMask(MaskVec); + for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - unsigned Val = cast(Arg)->getZExtValue(); - if (Val > NumElems) { - Arg = DAG.getConstant(NumElems, Arg.getValueType()); - Changed = true; - } + if (MaskVec[i] > (int)NumElems) { + MaskVec[i] = NumElems; + Changed = true; } - MaskVec.push_back(Arg); } - if (Changed) - Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(), - Mask.getValueType(), - &MaskVec[0], MaskVec.size()); - return Mask; + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0), + SVOp->getOperand(1), &MaskVec[0]); + return SDValue(SVOp, 0); } /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. -static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - - SmallVector MaskVec; - MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector Mask; + Mask.push_back(NumElems); for (unsigned i = 1; i != NumElems; ++i) - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); -} - -/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation -/// of specified width. -static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; + Mask.push_back(i); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); +} + +/// getUnpackl - Returns a vector_shuffle node for an unpackl operation. +static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector Mask; for (unsigned i = 0, e = NumElems/2; i != e; ++i) { - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); + Mask.push_back(i); + Mask.push_back(i + NumElems); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation -/// of specified width. -static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); +/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. +static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); unsigned Half = NumElems/2; - SmallVector MaskVec; + SmallVector Mask; for (unsigned i = 0; i != Half; ++i) { - MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); - MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); - } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); -} - -/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps -/// element #0 of a vector with the specified index, leaving the rest of the -/// elements in place. -static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, - SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; - // Element #0 of the result gets the elt we are replacing. - MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); - for (unsigned i = 1; i != NumElems; ++i) - MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + Mask.push_back(i + Half); + Mask.push_back(i + NumElems + Half); + } + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } /// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { - MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; - MVT VT = Op.getValueType(); - if (PVT == VT) - return Op; - SDValue V1 = Op.getOperand(0); - SDValue Mask = Op.getOperand(2); - unsigned MaskNumElems = Mask.getNumOperands(); - unsigned NumElems = MaskNumElems; - DebugLoc dl = Op.getDebugLoc(); - // Special handling of v4f32 -> v4i32. - if (VT != MVT::v4f32) { - // Find which element we want to splat. - SDNode* EltNoNode = getSplatMaskEltNo(Mask.getNode()).getNode(); - unsigned EltNo = cast(EltNoNode)->getZExtValue(); - // unpack elements to the correct location - while (NumElems > 4) { - if (EltNo < NumElems/2) { - Mask = getUnpacklMask(MaskNumElems, DAG, dl); - } else { - Mask = getUnpackhMask(MaskNumElems, DAG, dl); - EltNo -= NumElems/2; - } - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, Mask); - NumElems >>= 1; +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, + bool HasSSE2) { + if (SV->getValueType(0).getVectorNumElements() <= 4) + return SDValue(SV, 0); + + MVT PVT = MVT::v4f32; + MVT VT = SV->getValueType(0); + DebugLoc dl = SV->getDebugLoc(); + SDValue V1 = SV->getOperand(0); + int NumElems = VT.getVectorNumElements(); + int EltNo = SV->getSplatIndex(); + + // unpack elements to the correct location + while (NumElems > 4) { + if (EltNo < NumElems/2) { + V1 = getUnpackl(DAG, dl, VT, V1, V1); + } else { + V1 = getUnpackh(DAG, dl, VT, V1, V1); + EltNo -= NumElems/2; } - SDValue Cst = DAG.getConstant(EltNo, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); - } - - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, - DAG.getUNDEF(PVT), Mask); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); -} - -/// isVectorLoad - Returns true if the node is a vector load, a scalar -/// load that's promoted to vector, or a load bitcasted. -static bool isVectorLoad(SDValue Op) { - assert(Op.getValueType().isVector() && "Expected a vector type"); - if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR || - Op.getOpcode() == ISD::BIT_CONVERT) { - return isa(Op.getOperand(0)); - } - return isa(Op); -} - - -/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. -/// -static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, - SelectionDAG &DAG, bool HasSSE3) { - // If we have sse3 and shuffle has more than one use or input is a load, then - // use movddup. Otherwise, use movlhps. - bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); - MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; - MVT VT = Op.getValueType(); - if (VT == PVT) - return Op; - DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = PVT.getVectorNumElements(); - if (NumElems == 2) { - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - } else { - assert(NumElems == 4); - SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); - SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - Cst0, Cst1, Cst0, Cst1); + NumElems >>= 1; } - + + // Perform the splat. + int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, - DAG.getUNDEF(PVT), Mask); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -3141,39 +3003,31 @@ static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool isZero, bool HasSSE2, SelectionDAG &DAG) { - DebugLoc dl = V2.getDebugLoc(); MVT VT = V2.getValueType(); SDValue V1 = isZero - ? getZeroVector(VT, HasSSE2, DAG, dl) : DAG.getUNDEF(VT); - unsigned NumElems = V2.getValueType().getVectorNumElements(); - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT EVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; + ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector MaskVec; for (unsigned i = 0; i != NumElems; ++i) - if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. - MaskVec.push_back(DAG.getConstant(NumElems, EVT)); - else - MaskVec.push_back(DAG.getConstant(i, EVT)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); + // If this is the insertion idx, put the low elt of V2 here. + MaskVec.push_back(i == Idx ? NumElems : i); + return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); } /// getNumOfConsecutiveZeros - Return the number of elements in a result of /// a shuffle that is zero. static -unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, - unsigned NumElems, bool Low, - SelectionDAG &DAG) { +unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, + bool Low, SelectionDAG &DAG) { unsigned NumZeros = 0; - for (unsigned i = 0; i < NumElems; ++i) { + for (int i = 0; i < NumElems; ++i) { unsigned Index = Low ? i : NumElems-i-1; - SDValue Idx = Mask.getOperand(Index); - if (Idx.getOpcode() == ISD::UNDEF) { + int Idx = SVOp->getMaskElt(Index); + if (Idx < 0) { ++NumZeros; continue; } - SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); + SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); if (Elt.getNode() && isZeroNode(Elt)) ++NumZeros; else @@ -3184,40 +3038,39 @@ unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, /// isVectorShift - Returns true if the shuffle can be implemented as a /// logical left or right shift of a vector. -static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, +/// FIXME: split into pslldqi, psrldqi, palignr variants. +static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - unsigned NumElems = Mask.getNumOperands(); + int NumElems = SVOp->getValueType(0).getVectorNumElements(); isLeft = true; - unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); + unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG); if (!NumZeros) { isLeft = false; - NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); + NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG); if (!NumZeros) return false; } - bool SeenV1 = false; bool SeenV2 = false; - for (unsigned i = NumZeros; i < NumElems; ++i) { - unsigned Val = isLeft ? (i - NumZeros) : i; - SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); - if (Idx.getOpcode() == ISD::UNDEF) + for (int i = NumZeros; i < NumElems; ++i) { + int Val = isLeft ? (i - NumZeros) : i; + int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); + if (Idx < 0) continue; - unsigned Index = cast(Idx)->getZExtValue(); - if (Index < NumElems) + if (Idx < NumElems) SeenV1 = true; else { - Index -= NumElems; + Idx -= NumElems; SeenV2 = true; } - if (Index != Val) + if (Idx != Val) return false; } if (SeenV1 && SeenV2) return false; - ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); + ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); ShAmt = NumZeros; return true; } @@ -3302,8 +3155,8 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// getVShift - Return a vector logical shift node. /// static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, - unsigned NumBits, SelectionDAG &DAG, - const TargetLowering &TLI, DebugLoc dl) { + unsigned NumBits, SelectionDAG &DAG, + const TargetLowering &TLI, DebugLoc dl) { bool isMMX = VT.getSizeInBits() == 64; MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; @@ -3362,7 +3215,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } // Special case for single non-zero, non-undef, element. - if (NumNonZero == 1 && NumElems <= 4) { + if (NumNonZero == 1) { unsigned Idx = CountTrailingZeros_32(NonZeros); SDValue Item = Op.getOperand(Idx); @@ -3388,11 +3241,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. if (Idx != 0) { - SDValue Ops[] = { - Item, DAG.getUNDEF(Item.getValueType()), - getSwapEltZeroMask(VecElts, Idx, DAG, dl) - }; - Item = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VecVT, Ops, 3); + SmallVector Mask; + Mask.push_back(Idx); + for (unsigned i = 1; i != VecElts; ++i) + Mask.push_back(i); + Item = DAG.getVectorShuffle(VecVT, dl, Item, + DAG.getUNDEF(Item.getValueType()), + &Mask[0]); } return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); } @@ -3401,15 +3256,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // If we have a constant or non-constant insertion into the low element of // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into // the rest of the elements. This will be matched as movd/movq/movss/movsd - // depending on what the source datatype is. Because we can only get here - // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. - if (Idx == 0 && - // Don't do this for i64 values on x86-32. - (EVT != MVT::i64 || Subtarget->is64Bit())) { - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. - return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, - Subtarget->hasSSE2(), DAG); + // depending on what the source datatype is. + if (Idx == 0) { + if (NumZero == 0) { + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 || + (EVT == MVT::i64 && Subtarget->is64Bit())) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. + return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), + DAG); + } else if (EVT == MVT::i16 || EVT == MVT::i8) { + Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); + MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, + Subtarget->hasSSE2(), DAG); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item); + } } // Is it a vector logical left shift? @@ -3436,15 +3300,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Turn it into a shuffle of zero and zero-extended scalar to vector. Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget->hasSSE2(), DAG); - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT MaskEVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; + SmallVector MaskVec; for (unsigned i = 0; i < NumElems; i++) - MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Item, - DAG.getUNDEF(VT), Mask); + MaskVec.push_back(i == Idx ? 0 : 1); + return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]); } } @@ -3502,54 +3361,53 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { V[i] = V[i*2]; // Must be a zero vector. break; case 1: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2+1], V[i*2], - getMOVLMask(NumElems, DAG, dl)); + V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]); break; case 2: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], - getMOVLMask(NumElems, DAG, dl)); + V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]); break; case 3: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], - getUnpacklMask(NumElems, DAG, dl)); + V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]); break; } } - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT EVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; + SmallVector MaskVec; bool Reverse = (NonZeros & 0x3) == 2; for (unsigned i = 0; i < 2; ++i) - if (Reverse) - MaskVec.push_back(DAG.getConstant(1-i, EVT)); - else - MaskVec.push_back(DAG.getConstant(i, EVT)); + MaskVec.push_back(Reverse ? 1-i : i); Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; for (unsigned i = 0; i < 2; ++i) - if (Reverse) - MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); - else - MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); - SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[0], V[1], ShufMask); + MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems); + return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } if (Values.size() > 2) { + // If we have SSE 4.1, Expand into a number of inserts unless the number of + // values to be inserted is equal to the number of elements, in which case + // use the unpack code below in the hopes of matching the consecutive elts + // load merge pattern for shuffles. + // FIXME: We could probably just check that here directly. + if (Values.size() < NumElems && VT.getSizeInBits() == 128 && + getSubtarget()->hasSSE41()) { + V[0] = DAG.getUNDEF(VT); + for (unsigned i = 0; i < NumElems; ++i) + if (Op.getOperand(i).getOpcode() != ISD::UNDEF) + V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], + Op.getOperand(i), DAG.getIntPtrConstant(i)); + return V[0]; + } // Expand into a number of unpckl*. // e.g. for v4f32 // Step 1: unpcklps 0, 2 ==> X: // : unpcklps 1, 3 ==> Y: // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - SDValue UnpckMask = getUnpacklMask(NumElems, DAG, dl); for (unsigned i = 0; i < NumElems; ++i) V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); NumElems >>= 1; while (NumElems != 0) { for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i], V[i + NumElems], - UnpckMask); + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); NumElems >>= 1; } return V[0]; @@ -3564,11 +3422,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // 3. [ssse3] 2 x pshufb + 1 x por // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) static -SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, - SDValue PermMask, SelectionDAG &DAG, - X86TargetLowering &TLI, DebugLoc dl) { - SmallVector MaskElts(PermMask.getNode()->op_begin(), - PermMask.getNode()->op_end()); +SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); SmallVector MaskVals; // Determine if more than 1 of the words in each of the low and high quadwords @@ -3579,9 +3437,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, BitVector InputQuads(4); for (unsigned i = 0; i < 8; ++i) { SmallVectorImpl &Quad = i < 4 ? LoQuad : HiQuad; - SDValue Elt = MaskElts[i]; - int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : - cast(Elt)->getZExtValue(); + int EltIdx = SVOp->getMaskElt(i); MaskVals.push_back(EltIdx); if (EltIdx < 0) { ++Quad[0]; @@ -3634,14 +3490,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // words from all 4 input quadwords. SDValue NewV; if (BestLoQuad >= 0 || BestHiQuad >= 0) { - SmallVector MaskV; - MaskV.push_back(DAG.getConstant(BestLoQuad < 0 ? 0 : BestLoQuad, MVT::i64)); - MaskV.push_back(DAG.getConstant(BestHiQuad < 0 ? 1 : BestHiQuad, MVT::i64)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, &MaskV[0], 2); - - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), Mask); + SmallVector MaskV; + MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); + MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); + NewV = DAG.getVectorShuffle(MVT::v2i64, dl, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the @@ -3679,15 +3533,8 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - MaskV.clear(); - for (unsigned i = 0; i != 8; ++i) - MaskV.push_back((MaskVals[i] < 0) ? DAG.getUNDEF(MVT::i16) - : DAG.getConstant(MaskVals[i], - MVT::i16)); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, - &MaskV[0], 8)); + return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, + DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); } } @@ -3744,49 +3591,45 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // and update MaskVals with new element order. BitVector InOrder(8); if (BestLoQuad >= 0) { - SmallVector MaskV; + SmallVector MaskV; for (int i = 0; i != 4; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestLoQuad) { - MaskV.push_back(DAG.getConstant(idx & 3, MVT::i16)); + MaskV.push_back(idx & 3); InOrder.set(i); } else { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); } } for (unsigned i = 4; i != 8; ++i) - MaskV.push_back(DAG.getConstant(i, MVT::i16)); - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v8i16, &MaskV[0], 8)); + MaskV.push_back(i); + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); } // If BestHi >= 0, generate a pshufhw to put the high elements in order, // and update MaskVals with the new element order. if (BestHiQuad >= 0) { - SmallVector MaskV; + SmallVector MaskV; for (unsigned i = 0; i != 4; ++i) - MaskV.push_back(DAG.getConstant(i, MVT::i16)); + MaskV.push_back(i); for (unsigned i = 4; i != 8; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestHiQuad) { - MaskV.push_back(DAG.getConstant((idx & 3) + 4, MVT::i16)); + MaskV.push_back((idx & 3) + 4); InOrder.set(i); } else { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); } } - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v8i16, &MaskV[0], 8)); + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); } // In case BestHi & BestLo were both -1, which means each quadword has a word @@ -3822,12 +3665,13 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // 2. [ssse3] 2 x pshufb + 1 x por // 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw static -SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, - SDValue PermMask, SelectionDAG &DAG, - X86TargetLowering &TLI, DebugLoc dl) { - SmallVector MaskElts(PermMask.getNode()->op_begin(), - PermMask.getNode()->op_end()); +SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); SmallVector MaskVals; + SVOp->getMask(MaskVals); // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is @@ -3836,10 +3680,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, bool V1Only = true; bool V2Only = true; for (unsigned i = 0; i < 16; ++i) { - SDValue Elt = MaskElts[i]; - int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : - cast(Elt)->getZExtValue(); - MaskVals.push_back(EltIdx); + int EltIdx = MaskVals[i]; if (EltIdx < 0) continue; if (EltIdx < 16) @@ -3969,11 +3810,13 @@ SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> static -SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, - MVT VT, - SDValue PermMask, SelectionDAG &DAG, - TargetLowering &TLI, DebugLoc dl) { - unsigned NumElems = PermMask.getNumOperands(); +SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, + TargetLowering &TLI, DebugLoc dl) { + MVT VT = SVOp->getValueType(0); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); MVT MaskEltVT = MaskVT.getVectorElementType(); @@ -3992,38 +3835,35 @@ SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, else NewVT = MVT::v2f64; } - unsigned Scale = NumElems / NewWidth; - SmallVector MaskVec; + int Scale = NumElems / NewWidth; + SmallVector MaskVec; for (unsigned i = 0; i < NumElems; i += Scale) { - unsigned StartIdx = ~0U; - for (unsigned j = 0; j < Scale; ++j) { - SDValue Elt = PermMask.getOperand(i+j); - if (Elt.getOpcode() == ISD::UNDEF) + int StartIdx = -1; + for (int j = 0; j < Scale; ++j) { + int EltIdx = SVOp->getMaskElt(i+j); + if (EltIdx < 0) continue; - unsigned EltIdx = cast(Elt)->getZExtValue(); - if (StartIdx == ~0U) + if (StartIdx == -1) StartIdx = EltIdx - (EltIdx % Scale); if (EltIdx != StartIdx + j) return SDValue(); } - if (StartIdx == ~0U) - MaskVec.push_back(DAG.getUNDEF(MaskEltVT)); + if (StartIdx == -1) + MaskVec.push_back(-1); else - MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); + MaskVec.push_back(StartIdx / Scale); } V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size())); + return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); } /// getVZextMovL - Return a zero-extending vector move low node. /// static SDValue getVZextMovL(MVT VT, MVT OpVT, - SDValue SrcOp, SelectionDAG &DAG, - const X86Subtarget *Subtarget, DebugLoc dl) { + SDValue SrcOp, SelectionDAG &DAG, + const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { LoadSDNode *LD = NULL; if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) @@ -4057,31 +3897,34 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT, /// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of /// shuffles. static SDValue -LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, - SDValue PermMask, MVT VT, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = PermMask.getValueType(); - MVT MaskEVT = MaskVT.getVectorElementType(); +LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + MVT VT = SVOp->getValueType(0); + SmallVector, 8> Locs; Locs.resize(4); - SmallVector Mask1(4, DAG.getUNDEF(MaskEVT)); + SmallVector Mask1(4U, -1); + SmallVector PermMask; + SVOp->getMask(PermMask); + unsigned NumHi = 0; unsigned NumLo = 0; for (unsigned i = 0; i != 4; ++i) { - SDValue Elt = PermMask.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) { + int Idx = PermMask[i]; + if (Idx < 0) { Locs[i] = std::make_pair(-1, -1); } else { - unsigned Val = cast(Elt)->getZExtValue(); - assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); - if (Val < 4) { + assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!"); + if (Idx < 4) { Locs[i] = std::make_pair(0, NumLo); - Mask1[NumLo] = Elt; + Mask1[NumLo] = Idx; NumLo++; } else { Locs[i] = std::make_pair(1, NumHi); if (2+NumHi < 4) - Mask1[2+NumHi] = Elt; + Mask1[2+NumHi] = Idx; NumHi++; } } @@ -4092,24 +3935,21 @@ LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, // implemented with two shuffles. First shuffle gather the elements. // The second shuffle, which takes the first shuffle as both of its // vector operands, put the elements into the right order. - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &Mask1[0], Mask1.size())); + V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); - SmallVector Mask2(4, DAG.getUNDEF(MaskEVT)); + SmallVector Mask2(4U, -1); + for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) continue; else { unsigned Idx = (i < 2) ? 0 : 4; Idx += Locs[i].first * 2 + Locs[i].second; - Mask2[i] = DAG.getConstant(Idx, MaskEVT); + Mask2[i] = Idx; } } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &Mask2[0], Mask2.size())); + return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); } else if (NumLo == 3 || NumHi == 3) { // Otherwise, we must have three elements from one vector, call it X, and // one element from the other, call it Y. First, use a shufps to build an @@ -4120,60 +3960,51 @@ LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, // from X. if (NumHi == 3) { // Normalize it so the 3 elements come from V1. - PermMask = CommuteVectorShuffleMask(PermMask, DAG, dl); + CommuteVectorShuffleMask(PermMask, VT); std::swap(V1, V2); } // Find the element from V2. unsigned HiIndex; for (HiIndex = 0; HiIndex < 3; ++HiIndex) { - SDValue Elt = PermMask.getOperand(HiIndex); - if (Elt.getOpcode() == ISD::UNDEF) + int Val = PermMask[HiIndex]; + if (Val < 0) continue; - unsigned Val = cast(Elt)->getZExtValue(); if (Val >= 4) break; } - Mask1[0] = PermMask.getOperand(HiIndex); - Mask1[1] = DAG.getUNDEF(MaskEVT); - Mask1[2] = PermMask.getOperand(HiIndex^1); - Mask1[3] = DAG.getUNDEF(MaskEVT); - V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &Mask1[0], 4)); + Mask1[0] = PermMask[HiIndex]; + Mask1[1] = -1; + Mask1[2] = PermMask[HiIndex^1]; + Mask1[3] = -1; + V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); if (HiIndex >= 2) { - Mask1[0] = PermMask.getOperand(0); - Mask1[1] = PermMask.getOperand(1); - Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); - Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MaskVT, &Mask1[0], 4)); + Mask1[0] = PermMask[0]; + Mask1[1] = PermMask[1]; + Mask1[2] = HiIndex & 1 ? 6 : 4; + Mask1[3] = HiIndex & 1 ? 4 : 6; + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); } else { - Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); - Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); - Mask1[2] = PermMask.getOperand(2); - Mask1[3] = PermMask.getOperand(3); - if (Mask1[2].getOpcode() != ISD::UNDEF) - Mask1[2] = - DAG.getConstant(cast(Mask1[2])->getZExtValue()+4, - MaskEVT); - if (Mask1[3].getOpcode() != ISD::UNDEF) - Mask1[3] = - DAG.getConstant(cast(Mask1[3])->getZExtValue()+4, - MaskEVT); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V2, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MaskVT, &Mask1[0], 4)); + Mask1[0] = HiIndex & 1 ? 2 : 0; + Mask1[1] = HiIndex & 1 ? 0 : 2; + Mask1[2] = PermMask[2]; + Mask1[3] = PermMask[3]; + if (Mask1[2] >= 0) + Mask1[2] += 4; + if (Mask1[3] >= 0) + Mask1[3] += 4; + return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); } } // Break it into (shuffle shuffle_hi, shuffle_lo). Locs.clear(); - SmallVector LoMask(4, DAG.getUNDEF(MaskEVT)); - SmallVector HiMask(4, DAG.getUNDEF(MaskEVT)); - SmallVector *MaskPtr = &LoMask; + SmallVector LoMask(4U, -1); + SmallVector HiMask(4U, -1); + + SmallVector *MaskPtr = &LoMask; unsigned MaskIdx = 0; unsigned LoIdx = 0; unsigned HiIdx = 2; @@ -4184,84 +4015,62 @@ LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, LoIdx = 0; HiIdx = 2; } - SDValue Elt = PermMask.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) { + int Idx = PermMask[i]; + if (Idx < 0) { Locs[i] = std::make_pair(-1, -1); - } else if (cast(Elt)->getZExtValue() < 4) { + } else if (Idx < 4) { Locs[i] = std::make_pair(MaskIdx, LoIdx); - (*MaskPtr)[LoIdx] = Elt; + (*MaskPtr)[LoIdx] = Idx; LoIdx++; } else { Locs[i] = std::make_pair(MaskIdx, HiIdx); - (*MaskPtr)[HiIdx] = Elt; + (*MaskPtr)[HiIdx] = Idx; HiIdx++; } } - SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &LoMask[0], LoMask.size())); - SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &HiMask[0], HiMask.size())); - SmallVector MaskOps; + SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]); + SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]); + SmallVector MaskOps; for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) { - MaskOps.push_back(DAG.getUNDEF(MaskEVT)); + MaskOps.push_back(-1); } else { unsigned Idx = Locs[i].first * 4 + Locs[i].second; - MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); + MaskOps.push_back(Idx); } } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LoShuffle, HiShuffle, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskOps[0], MaskOps.size())); + return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); } SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); MVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = PermMask.getNumOperands(); + unsigned NumElems = VT.getVectorNumElements(); bool isMMX = VT.getSizeInBits() == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; - // FIXME: Check for legal shuffle and return? - - if (isUndefShuffle(Op.getNode())) - return DAG.getUNDEF(VT); - - if (isZeroShuffle(Op.getNode())) + if (isZeroShuffle(SVOp)) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); - if (isIdentityMask(PermMask.getNode())) - return V1; - else if (isIdentityMask(PermMask.getNode(), true)) - return V2; - - // Canonicalize movddup shuffles. - if (V2IsUndef && Subtarget->hasSSE2() && - VT.getSizeInBits() == 128 && - X86::isMOVDDUPMask(PermMask.getNode())) - return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); - - if (isSplatMask(PermMask.getNode())) { - if (isMMX || NumElems < 4) return Op; - // Promote it to a v4{if}32 splat. - return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); + // Promote splats to v4f32. + if (SVOp->isSplat()) { + if (isMMX || NumElems < 4) + return Op; + return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); } // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, - *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); if (NewOp.getNode()) return DAG.getNode(ISD::BIT_CONVERT, dl, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); @@ -4269,32 +4078,29 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, - DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); if (NewOp.getNode()) { - SDValue NewV1 = NewOp.getOperand(0); - SDValue NewV2 = NewOp.getOperand(1); - SDValue NewMask = NewOp.getOperand(2); - if (isCommutedMOVL(NewMask.getNode(), true, false)) { - NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); - return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget, - dl); - } + if (isCommutedMOVL(cast(NewOp), true, false)) + return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), + DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, - DAG, *this, dl); - if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + if (NewOp.getNode() && X86::isMOVLMask(cast(NewOp))) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), - DAG, Subtarget, dl); + DAG, Subtarget, dl); } } - + + if (X86::isPSHUFDMask(SVOp)) + return Op; + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); + bool isShift = getSubtarget()->hasSSE2() && + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -4302,8 +4108,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - - if (X86::isMOVLMask(PermMask.getNode())) { + + if (X86::isMOVLMask(SVOp)) { if (V1IsUndef) return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) @@ -4311,17 +4117,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX) return Op; } - - if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || - X86::isMOVSLDUPMask(PermMask.getNode()) || - X86::isMOVHLPSMask(PermMask.getNode()) || - X86::isMOVHPMask(PermMask.getNode()) || - X86::isMOVLPMask(PermMask.getNode()))) + + // FIXME: fold these into legal mask. + if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || + X86::isMOVSLDUPMask(SVOp) || + X86::isMOVHLPSMask(SVOp) || + X86::isMOVHPMask(SVOp) || + X86::isMOVLPMask(SVOp))) return Op; - if (ShouldXformToMOVHLPS(PermMask.getNode()) || - ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) - return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (ShouldXformToMOVHLPS(SVOp) || + ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) + return CommuteVectorShuffle(SVOp, DAG); if (isShift) { // No better options. Use a vshl / vsrl. @@ -4329,7 +4136,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. @@ -4338,115 +4145,86 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Canonicalize the splat or undef, if present, to be on the RHS. if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + Op = CommuteVectorShuffle(SVOp, DAG); + SVOp = cast(Op); + V1 = SVOp->getOperand(0); + V2 = SVOp->getOperand(1); std::swap(V1IsSplat, V2IsSplat); std::swap(V1IsUndef, V2IsUndef); Commuted = true; } - // FIXME: Figure out a cleaner way to do this. - if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { - if (V2IsUndef) return V1; - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (V2IsSplat) { - // V2 is a splat, so the mask may be malformed. That is, it may point - // to any V2 element. The instruction selectior won't like this. Get - // a corrected mask and commute to form a proper MOVS{S|D}. - SDValue NewMask = getMOVLMask(NumElems, DAG, dl); - if (NewMask.getNode() != PermMask.getNode()) - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); - } - return Op; + if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { + // Shuffling low element of v1 into undef, just return v1. + if (V2IsUndef) + return V1; + // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which + // the instruction selector will not match, so get a canonical MOVL with + // swapped operands to undo the commute. + return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKLMask(PermMask.getNode()) || - X86::isUNPCKHMask(PermMask.getNode())) + if (X86::isUNPCKL_v_undef_Mask(SVOp) || + X86::isUNPCKH_v_undef_Mask(SVOp) || + X86::isUNPCKLMask(SVOp) || + X86::isUNPCKHMask(SVOp)) return Op; if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first // element then try to match unpck{h|l} again. If match, return a // new vector_shuffle with the corrected mask. - SDValue NewMask = NormalizeMask(PermMask, DAG); - if (NewMask.getNode() != PermMask.getNode()) { - if (X86::isUNPCKLMask(NewMask.getNode(), true)) { - SDValue NewMask = getUnpacklMask(NumElems, DAG, dl); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); - } else if (X86::isUNPCKHMask(NewMask.getNode(), true)) { - SDValue NewMask = getUnpackhMask(NumElems, DAG, dl); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); + SDValue NewMask = NormalizeMask(SVOp, DAG); + ShuffleVectorSDNode *NSVOp = cast(NewMask); + if (NSVOp != SVOp) { + if (X86::isUNPCKLMask(NSVOp, true)) { + return NewMask; + } else if (X86::isUNPCKHMask(NSVOp, true)) { + return NewMask; } } } - // Normalize the node to match x86 shuffle ops if needed - if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (Commuted) { // Commute is back and try unpck* again. - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKLMask(PermMask.getNode()) || - X86::isUNPCKHMask(PermMask.getNode())) - return Op; + // FIXME: this seems wrong. + SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); + ShuffleVectorSDNode *NewSVOp = cast(NewOp); + if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || + X86::isUNPCKH_v_undef_Mask(NewSVOp) || + X86::isUNPCKLMask(NewSVOp) || + X86::isUNPCKHMask(NewSVOp)) + return NewOp; } // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. - // Try PSHUF* first, then SHUFP*. - // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically - // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. - if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { - if (V2.getOpcode() != ISD::UNDEF) - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, - DAG.getUNDEF(VT), PermMask); - return Op; - } - - if (!isMMX) { - if (Subtarget->hasSSE2() && - (X86::isPSHUFDMask(PermMask.getNode()) || - X86::isPSHUFHWMask(PermMask.getNode()) || - X86::isPSHUFLWMask(PermMask.getNode()))) { - MVT RVT = VT; - if (VT == MVT::v4f32) { - RVT = MVT::v4i32; - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, - DAG.getNode(ISD::BIT_CONVERT, dl, RVT, V1), - DAG.getUNDEF(RVT), PermMask); - } else if (V2.getOpcode() != ISD::UNDEF) - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, V1, - DAG.getUNDEF(RVT), PermMask); - if (RVT != VT) - Op = DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); - return Op; - } - // Binary or unary shufps. - if (X86::isSHUFPMask(PermMask.getNode()) || - (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) - return Op; - } + // Normalize the node to match x86 shuffle ops if needed + if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + return CommuteVectorShuffle(SVOp, DAG); + // Check for legal shuffle and return? + SmallVector PermMask; + SVOp->getMask(PermMask); + if (isShuffleMaskLegal(PermMask, VT)) + return Op; + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this, dl); + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); if (NewOp.getNode()) return NewOp; } if (VT == MVT::v16i8) { - SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(V1, V2, PermMask, DAG, *this, dl); + SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this); if (NewOp.getNode()) return NewOp; } // Handle all 4 wide cases with a number of shuffles except for MMX. if (NumElems == 4 && !isMMX) - return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG, dl); + return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); return SDValue(); } @@ -4540,22 +4318,12 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; + // SHUFPS the element to the lowest double word, then movss. - MVT MaskVT = MVT::getIntVectorWithNumElements(4); - SmallVector IdxVec; - IdxVec. - push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &IdxVec[0], IdxVec.size()); - SDValue Vec = Op.getOperand(0); - Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), - Vec, DAG.getUNDEF(Vec.getValueType()), Mask); + int Mask[4] = { Idx, -1, -1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } else if (VT.getSizeInBits() == 64) { @@ -4569,17 +4337,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // UNPCKHPD the element to the lowest double word, then movsd. // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. - MVT MaskVT = MVT::getIntVectorWithNumElements(2); - SmallVector IdxVec; - IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &IdxVec[0], IdxVec.size()); - SDValue Vec = Op.getOperand(0); - Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), - Vec, DAG.getUNDEF(Vec.getValueType()), - Mask); + int Mask[2] = { 1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } @@ -4643,7 +4404,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if (EVT.getSizeInBits() == 16) { + if (EVT.getSizeInBits() == 16 && isa(N2)) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. if (N1.getValueType() != MVT::i32) @@ -4686,21 +4447,104 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { ConstantPoolSDNode *CP = cast(Op); - // FIXME there isn't really any debug info here, should come from the parent - DebugLoc dl = CP->getDebugLoc(); + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + unsigned char OpFlag = 0; + unsigned WrapperKind = X86ISD::Wrapper; + + if (Subtarget->is64Bit() && + getTargetMachine().getCodeModel() == CodeModel::Small) { + WrapperKind = X86ISD::WrapperRIP; + } else if (Subtarget->isPICStyleGOT()) { + OpFlag = X86II::MO_GOTOFF; + } else if (Subtarget->isPICStyleStub() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) { + OpFlag = X86II::MO_PIC_BASE_OFFSET; + } + SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(), - CP->getAlignment()); - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + CP->getAlignment(), + CP->getOffset(), OpFlag); + DebugLoc DL = CP->getDebugLoc(); + Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + // With PIC, the address is actually $g + Offset. + if (OpFlag) { + Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), getPointerTy()), + Result); + } + + return Result; +} + +SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { + JumpTableSDNode *JT = cast(Op); + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + unsigned char OpFlag = 0; + unsigned WrapperKind = X86ISD::Wrapper; + + if (Subtarget->is64Bit()) { + WrapperKind = X86ISD::WrapperRIP; + } else if (Subtarget->isPICStyleGOT()) { + OpFlag = X86II::MO_GOTOFF; + } else if (Subtarget->isPICStyleStub() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) { + OpFlag = X86II::MO_PIC_BASE_OFFSET; + } + + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), + OpFlag); + DebugLoc DL = JT->getDebugLoc(); + Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + + // With PIC, the address is actually $g + Offset. + if (OpFlag) { + Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), getPointerTy()), + Result); + } + + return Result; +} + +SDValue +X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { + const char *Sym = cast(Op)->getSymbol(); + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + unsigned char OpFlag = 0; + unsigned WrapperKind = X86ISD::Wrapper; + if (Subtarget->is64Bit()) { + WrapperKind = X86ISD::WrapperRIP; + } else if (Subtarget->isPICStyleGOT()) { + OpFlag = X86II::MO_GOTOFF; + } else if (Subtarget->isPICStyleStub() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) { + OpFlag = X86II::MO_PIC_BASE_OFFSET; + } + + SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag); + + DebugLoc DL = Op.getDebugLoc(); + Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + + // With PIC, the address is actually $g + Offset. if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - !Subtarget->isPICStyleRIPRel()) { - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), + !Subtarget->is64Bit()) { + Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(), getPointerTy()), Result); } - + return Result; } @@ -4708,22 +4552,62 @@ SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, int64_t Offset, SelectionDAG &DAG) const { - bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; + bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; bool ExtraLoadRequired = Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false); // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. SDValue Result; - if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) { + if (!IsPIC && !ExtraLoadRequired && isInt32(Offset)) { + // A direct static reference to a global. Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); Offset = 0; - } else - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0); - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + } else { + unsigned char OpFlags = 0; + + if (GV->hasDLLImportLinkage()) + OpFlags = X86II::MO_DLLIMPORT; + else if (Subtarget->isPICStyleRIPRel()) { + if (ExtraLoadRequired) + OpFlags = X86II::MO_GOTPCREL; + } else if (Subtarget->isPICStyleGOT()) { + if (ExtraLoadRequired) + OpFlags = X86II::MO_GOT; + else + OpFlags = X86II::MO_GOTOFF; + } else if (Subtarget->isPICStyleStub()) { + // In darwin, we have multiple different stub types, and we have both PIC + // and -mdynamic-no-pic. Determine whether we have a stub reference + // and/or whether the reference is relative to the PIC base or not. + + // Link-once, declaration, or Weakly-linked global variables need + // non-lazily-resolved stubs. + if (!ExtraLoadRequired) { + // Not a stub reference. + OpFlags = IsPIC ? X86II::MO_PIC_BASE_OFFSET : 0; + } else if (!GV->hasHiddenVisibility()) { + // Non-hidden $non_lazy_ptr reference. + OpFlags = IsPIC ? X86II::MO_DARWIN_NONLAZY_PIC_BASE : + X86II::MO_DARWIN_NONLAZY; + } else { + // Hidden $non_lazy_ptr reference. + OpFlags = IsPIC ? X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE : + X86II::MO_DARWIN_HIDDEN_NONLAZY; + } + } + + Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + } + + if (Subtarget->is64Bit() && + getTargetMachine().getCodeModel() == CodeModel::Small) + Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); + else + Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); // With PIC, the address is actually $g + Offset. - if (IsPic && !Subtarget->isPICStyleRIPRel()) { + if (IsPIC && !Subtarget->is64Bit()) { Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), Result); @@ -4756,19 +4640,23 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, - SDValue *InFlag) { + SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg, + unsigned char OperandFlags) { SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), - GA->getOffset()); + GA->getOffset(), + OperandFlags); if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; - return DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3); + Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3); } else { SDValue Ops[] = { Chain, TGA }; - return DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2); + Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2); } + SDValue Flag = Chain.getValue(1); + return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit @@ -4783,42 +4671,15 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, PtrVT), InFlag); InFlag = Chain.getValue(1); - Chain = GetTLSADDR(DAG, Chain, GA, &InFlag); - InFlag = Chain.getValue(1); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops1[] = { Chain, - DAG.getTargetExternalSymbol("___tls_get_addr", - PtrVT), - DAG.getRegister(X86::EAX, PtrVT), - DAG.getRegister(X86::EBX, PtrVT), - InFlag }; - Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops1, 5); - InFlag = Chain.getValue(1); - - return DAG.getCopyFromReg(Chain, dl, X86::EAX, PtrVT, InFlag); + return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, const MVT PtrVT) { - SDValue InFlag, Chain; - DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better - - Chain = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL); - InFlag = Chain.getValue(1); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops1[] = { Chain, - DAG.getTargetExternalSymbol("__tls_get_addr", - PtrVT), - DAG.getRegister(X86::RDI, PtrVT), - InFlag }; - Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops1, 4); - InFlag = Chain.getValue(1); - - return DAG.getCopyFromReg(Chain, dl, X86::RAX, PtrVT, InFlag); + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, + X86::RAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or @@ -4836,12 +4697,26 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, NULL, 0); + unsigned char OperandFlags = 0; + // Most TLS accesses are not RIP relative, even on x86-64. One exception is + // initialexec. + unsigned WrapperKind = X86ISD::Wrapper; + if (model == TLSModel::LocalExec) { + OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; + } else if (is64Bit) { + assert(model == TLSModel::InitialExec); + OperandFlags = X86II::MO_GOTTPOFF; + WrapperKind = X86ISD::WrapperRIP; + } else { + assert(model == TLSModel::InitialExec); + OperandFlags = X86II::MO_INDNTPOFF; + } + // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), - GA->getValueType(0), - GA->getOffset()); - SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + GA->getOffset(), OperandFlags); + SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, @@ -4859,72 +4734,33 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); GlobalAddressSDNode *GA = cast(Op); - GlobalValue *GV = GA->getGlobal(); - TLSModel::Model model = - getTLSModel (GV, getTargetMachine().getRelocationModel()); - if (Subtarget->is64Bit()) { - switch (model) { - case TLSModel::GeneralDynamic: - case TLSModel::LocalDynamic: // not implemented + const GlobalValue *GV = GA->getGlobal(); + + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast(GV)) + GV = GA->resolveAliasedGlobal(false); + + TLSModel::Model model = getTLSModel(GV, + getTargetMachine().getRelocationModel()); + + switch (model) { + case TLSModel::GeneralDynamic: + case TLSModel::LocalDynamic: // not implemented + if (Subtarget->is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); - - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, true); - } - } else { - switch (model) { - case TLSModel::GeneralDynamic: - case TLSModel::LocalDynamic: // not implemented - return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); - - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, false); - } + return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, + Subtarget->is64Bit()); } + assert(0 && "Unreachable"); return SDValue(); } -SDValue -X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { - // FIXME there isn't really any debug info here - DebugLoc dl = Op.getDebugLoc(); - const char *Sym = cast(Op)->getSymbol(); - SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); - // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - !Subtarget->isPICStyleRIPRel()) { - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc::getUnknownLoc(), - getPointerTy()), - Result); - } - - return Result; -} - -SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { - JumpTableSDNode *JT = cast(Op); - // FIXME there isn't really any debug into here - DebugLoc dl = JT->getDebugLoc(); - SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); - // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - !Subtarget->isPICStyleRIPRel()) { - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc::getUnknownLoc(), - getPointerTy()), - Result); - } - - return Result; -} /// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and /// take a 2 x i32 value to shift plus a shift amount. @@ -4975,15 +4811,25 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { MVT SrcVT = Op.getOperand(0).getValueType(); + + if (SrcVT.isVector()) { + if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) { + return Op; + } + return SDValue(); + } + assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); - // These are really Legal; caller falls through into that case. + // These are really Legal; return the operand so the caller accepts it as + // Legal. if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) - return SDValue(); - if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 && - Subtarget->is64Bit()) - return SDValue(); + return Op; + if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && + Subtarget->is64Bit()) { + return Op; + } DebugLoc dl = Op.getDebugLoc(); unsigned Size = SrcVT.getSizeInBits()/8; @@ -4993,8 +4839,14 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, PseudoSourceValue::getFixedStack(SSFI), 0); + return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); +} +SDValue X86TargetLowering::BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, + SDValue StackSlot, + SelectionDAG &DAG) { // Build the FILD + DebugLoc dl = Op.getDebugLoc(); SDVTList Tys; bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); if (useSSE) @@ -5086,19 +4938,6 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); - SmallVector MaskVec; - MaskVec.push_back(DAG.getConstant(0, MVT::i32)); - MaskVec.push_back(DAG.getConstant(4, MVT::i32)); - MaskVec.push_back(DAG.getConstant(1, MVT::i32)); - MaskVec.push_back(DAG.getConstant(5, MVT::i32)); - SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &MaskVec[0], MaskVec.size()); - SmallVector MaskVec2; - MaskVec2.push_back(DAG.getConstant(1, MVT::i32)); - MaskVec2.push_back(DAG.getConstant(0, MVT::i32)); - SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, - &MaskVec2[0], MaskVec2.size()); - SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), @@ -5107,13 +4946,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), DAG.getIntPtrConstant(0))); - SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, - XR1, XR2, UnpcklMask); + SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, false, 16); - SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, - Unpck1, CLod0, UnpcklMask); + SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, @@ -5121,8 +4958,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. - SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2f64, - Sub, Sub, ShufMask); + int ShufMask[2] = { 1, -1 }; + SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, + DAG.getUNDEF(MVT::v2f64), ShufMask); SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add, DAG.getIntPtrConstant(0)); @@ -5186,43 +5024,62 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { MVT SrcVT = N0.getValueType(); if (SrcVT == MVT::i64) { - // We only handle SSE2 f64 target here; caller can handle the rest. + // We only handle SSE2 f64 target here; caller can expand the rest. if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) return SDValue(); return LowerUINT_TO_FP_i64(Op, DAG); - } else if (SrcVT == MVT::i32) { + } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) { return LowerUINT_TO_FP_i32(Op, DAG); } - assert(0 && "Unknown UINT_TO_FP to lower!"); - return SDValue(); + assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!"); + + // Make a 64-bit buffer, and use it to build an FILD. + SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); + SDValue WordOff = DAG.getConstant(4, getPointerTy()); + SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, + getPointerTy(), StackSlot, WordOff); + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + StackSlot, NULL, 0); + SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), + OffsetSlot, NULL, 0); + return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); } std::pair X86TargetLowering:: -FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) { +FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { DebugLoc dl = Op.getDebugLoc(); - assert(Op.getValueType().getSimpleVT() <= MVT::i64 && - Op.getValueType().getSimpleVT() >= MVT::i16 && + + MVT DstTy = Op.getValueType(); + + if (!IsSigned) { + assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); + DstTy = MVT::i64; + } + + assert(DstTy.getSimpleVT() <= MVT::i64 && + DstTy.getSimpleVT() >= MVT::i16 && "Unknown FP_TO_SINT to lower!"); // These are really Legal. - if (Op.getValueType() == MVT::i32 && + if (DstTy == MVT::i32 && isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); if (Subtarget->is64Bit() && - Op.getValueType() == MVT::i64 && - Op.getOperand(0).getValueType() != MVT::f80) + DstTy == MVT::i64 && + isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary // stack slot. MachineFunction &MF = DAG.getMachineFunction(); - unsigned MemSize = Op.getValueType().getSizeInBits()/8; + unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + unsigned Opc; - switch (Op.getValueType().getSimpleVT()) { + switch (DstTy.getSimpleVT()) { default: assert(0 && "Invalid FP_TO_SINT to lower!"); case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; @@ -5232,7 +5089,7 @@ FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) { SDValue Chain = DAG.getEntryNode(); SDValue Value = Op.getOperand(0); if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { - assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); + assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, dl, Value, StackSlot, PseudoSourceValue::getFixedStack(SSFI), 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); @@ -5253,9 +5110,28 @@ FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { - std::pair Vals = FP_TO_SINTHelper(Op, DAG); + if (Op.getValueType().isVector()) { + if (Op.getValueType() == MVT::v2i32 && + Op.getOperand(0).getValueType() == MVT::v2f64) { + return Op; + } + return SDValue(); + } + + std::pair Vals = FP_TO_INTHelper(Op, DAG, true); + SDValue FIST = Vals.first, StackSlot = Vals.second; + // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. + if (FIST.getNode() == 0) return Op; + + // Load the result. + return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), + FIST, StackSlot, NULL, 0); +} + +SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { + std::pair Vals = FP_TO_INTHelper(Op, DAG, false); SDValue FIST = Vals.first, StackSlot = Vals.second; - if (FIST.getNode() == 0) return SDValue(); + assert(FIST.getNode() && "Unexpected failure"); // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), @@ -5971,7 +5847,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, Args.push_back(Entry); std::pair CallResult = LowerCallTo(Chain, Type::VoidTy, false, false, false, false, - CallingConv::C, false, + 0, CallingConv::C, false, DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; } @@ -6212,8 +6088,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDValue SrcPtr = Op.getOperand(1); SDValue SrcSV = Op.getOperand(2); - assert(0 && "VAArgInst is not yet implemented for x86-64!"); - abort(); + llvm_report_error("VAArgInst is not yet implemented for x86-64!"); return SDValue(); } @@ -6414,7 +6289,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_mmx_psrai_d: NewIntNo = Intrinsic::x86_mmx_psra_d; break; - default: abort(); // Can't reach here. + default: LLVM_UNREACHABLE("Impossible intrinsic"); // Can't reach here. } break; } @@ -6586,8 +6461,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; if (InRegCount > 2) { - cerr << "Nest register in use - reduce number of inreg parameters!\n"; - abort(); + llvm_report_error("Nest register in use - reduce number of inreg parameters!"); } } break; @@ -6954,6 +6828,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); + case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); @@ -7025,7 +6900,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, assert(false && "Do not know how to custom type legalize this operation!"); return; case ISD::FP_TO_SINT: { - std::pair Vals = FP_TO_SINTHelper(SDValue(N, 0), DAG); + std::pair Vals = + FP_TO_INTHelper(SDValue(N, 0), DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; if (FIST.getNode() != 0) { MVT VT = N->getValueType(0); @@ -7139,6 +7015,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; + case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; @@ -7269,39 +7146,48 @@ bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const { return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit(); } +bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const { + // i16 instructions are longer (0x66 prefix) and potentially slower. + return !(VT1 == MVT::i32 && VT2 == MVT::i16); +} + /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool -X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { +X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, + MVT VT) const { // Only do shuffles on 128-bit vector types for now. - // FIXME: pshufb, blends - if (VT.getSizeInBits() == 64) return false; - return (Mask.getNode()->getNumOperands() <= 4 || - isIdentityMask(Mask.getNode()) || - isIdentityMask(Mask.getNode(), true) || - isSplatMask(Mask.getNode()) || - X86::isPSHUFHWMask(Mask.getNode()) || - X86::isPSHUFLWMask(Mask.getNode()) || - X86::isUNPCKLMask(Mask.getNode()) || - X86::isUNPCKHMask(Mask.getNode()) || - X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || - X86::isUNPCKH_v_undef_Mask(Mask.getNode())); + if (VT.getSizeInBits() == 64) + return false; + + // FIXME: pshufb, blends, palignr, shifts. + return (VT.getVectorNumElements() == 2 || + ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + isMOVLMask(M, VT) || + isSHUFPMask(M, VT) || + isPSHUFDMask(M, VT) || + isPSHUFHWMask(M, VT) || + isPSHUFLWMask(M, VT) || + isUNPCKLMask(M, VT) || + isUNPCKHMask(M, VT) || + isUNPCKL_v_undef_Mask(M, VT) || + isUNPCKH_v_undef_Mask(M, VT)); } bool -X86TargetLowering::isVectorClearMaskLegal(const std::vector &BVOps, - MVT EVT, SelectionDAG &DAG) const { - unsigned NumElts = BVOps.size(); - // Only do shuffles on 128-bit vector types for now. - if (EVT.getSizeInBits() * NumElts == 64) return false; - if (NumElts == 2) return true; - if (NumElts == 4) { - return (isMOVLMask(&BVOps[0], 4) || - isCommutedMOVL(&BVOps[0], 4, true) || - isSHUFPMask(&BVOps[0], 4) || - isCommutedSHUFP(&BVOps[0], 4)); +X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl &Mask, + MVT VT) const { + unsigned NumElts = VT.getVectorNumElements(); + // FIXME: This collection of masks seems suspect. + if (NumElts == 2) + return true; + if (NumElts == 4 && VT.getSizeInBits() == 128) { + return (isMOVLMask(Mask, VT) || + isCommutedMOVLMask(Mask, VT, true) || + isSHUFPMask(Mask, VT) || + isCommutedSHUFPMask(Mask, VT)); } return false; } @@ -7357,7 +7243,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, // Insert instructions into newMBB based on incoming instruction assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 && - "unexpected number of operands"); + "unexpected number of operands"); DebugLoc dl = bInstr->getDebugLoc(); MachineOperand& destOper = bInstr->getOperand(0); MachineOperand* argOpers[2 + X86AddrNumOperands]; @@ -7467,7 +7353,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, // Insert instructions into newMBB based on incoming instruction // There are 8 "real" operands plus 9 implicit def/uses, ignored here. assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 && - "unexpected number of operands"); + "unexpected number of operands"); MachineOperand& dest1Oper = bInstr->getOperand(0); MachineOperand& dest2Oper = bInstr->getOperand(1); MachineOperand* argOpers[2 + X86AddrNumOperands]; @@ -7514,7 +7400,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, int valArgIndx = lastAddrIndx + 1; assert((argOpers[valArgIndx]->isReg() || - argOpers[valArgIndx]->isImm()) && + argOpers[valArgIndx]->isImm()) && "invalid operand"); unsigned t5 = F->getRegInfo().createVirtualRegister(RC); unsigned t6 = F->getRegInfo().createVirtualRegister(RC); @@ -7526,9 +7412,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(tt1); (*MIB).addOperand(*argOpers[valArgIndx]); assert(argOpers[valArgIndx + 1]->isReg() == - argOpers[valArgIndx]->isReg()); + argOpers[valArgIndx]->isReg()); assert(argOpers[valArgIndx + 1]->isImm() == - argOpers[valArgIndx]->isImm()); + argOpers[valArgIndx]->isImm()); if (argOpers[valArgIndx + 1]->isReg()) MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6); else @@ -7609,7 +7495,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, DebugLoc dl = mInstr->getDebugLoc(); // Insert instructions into newMBB based on incoming instruction assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 && - "unexpected number of operands"); + "unexpected number of operands"); MachineOperand& destOper = mInstr->getOperand(0); MachineOperand* argOpers[2 + X86AddrNumOperands]; int numArgs = mInstr->getNumOperands() - 1; @@ -8036,16 +7922,16 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, return false; } -static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, - unsigned NumElems, MVT EVT, - SDNode *&Base, +static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, + MVT EVT, LoadSDNode *&LDBase, + unsigned &LastLoadedElt, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { - Base = NULL; + LDBase = NULL; + LastLoadedElt = -1U; for (unsigned i = 0; i < NumElems; ++i) { - SDValue Idx = PermMask.getOperand(i); - if (Idx.getOpcode() == ISD::UNDEF) { - if (!Base) + if (N->getMaskElt(i) < 0) { + if (!LDBase) return false; continue; } @@ -8054,18 +7940,20 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, if (!Elt.getNode() || (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) return false; - if (!Base) { - Base = Elt.getNode(); - if (Base->getOpcode() == ISD::UNDEF) + if (!LDBase) { + if (Elt.getNode()->getOpcode() == ISD::UNDEF) return false; + LDBase = cast(Elt.getNode()); + LastLoadedElt = i; continue; } if (Elt.getOpcode() == ISD::UNDEF) continue; - if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, - EVT.getSizeInBits()/8, i, MFI)) + LoadSDNode *LD = cast(Elt); + if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI)) return false; + LastLoadedElt = i; } return true; } @@ -8077,106 +7965,39 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, /// shuffle to be an appropriate build vector so it can take advantage of // performBuildVectorCombine. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - SDValue PermMask = N->getOperand(2); - unsigned NumElems = PermMask.getNumOperands(); - - // For x86-32 machines, if we see an insert and then a shuffle in a v2i64 - // where the upper half is 0, it is advantageous to rewrite it as a build - // vector of (0, val) so it can use movq. - if (VT == MVT::v2i64) { - SDValue In[2]; - In[0] = N->getOperand(0); - In[1] = N->getOperand(1); - unsigned Idx0 =cast(PermMask.getOperand(0))->getZExtValue(); - unsigned Idx1 =cast(PermMask.getOperand(1))->getZExtValue(); - if (In[0].getValueType().getVectorNumElements() == NumElems && - In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT && - In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) { - ConstantSDNode* InsertVecIdx = - dyn_cast(In[Idx0/2].getOperand(2)); - if (InsertVecIdx && - InsertVecIdx->getZExtValue() == (Idx0 % 2) && - isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) { - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - In[Idx0/2].getOperand(1), - In[Idx1/2].getOperand(Idx1 % 2)); - } - } - } + ShuffleVectorSDNode *SVN = cast(N); + unsigned NumElems = VT.getVectorNumElements(); + + if (VT.getSizeInBits() != 128) + return SDValue(); // Try to combine a vector_shuffle into a 128-bit load. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - SDNode *Base = NULL; - if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base, - DAG, MFI, TLI)) + LoadSDNode *LD = NULL; + unsigned LastLoadedElt; + if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, LD, LastLoadedElt, DAG, + MFI, TLI)) return SDValue(); - LoadSDNode *LD = cast(Base); - if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI)) + if (LastLoadedElt == NumElems - 1) { + if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI)) + return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile()); return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile()); - return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); -} - -/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. -static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget, - const TargetLowering &TLI) { - unsigned NumOps = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); - - // Ignore single operand BUILD_VECTOR. - if (NumOps == 1) - return SDValue(); - - MVT VT = N->getValueType(0); - MVT EVT = VT.getVectorElementType(); - if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) - // We are looking for load i64 and zero extend. We want to transform - // it before legalizer has a chance to expand it. Also look for i64 - // BUILD_PAIR bit casted to f64. - return SDValue(); - // This must be an insertion into a zero vector. - SDValue HighElt = N->getOperand(1); - if (!isZeroNode(HighElt)) - return SDValue(); - - // Value must be a load. - SDNode *Base = N->getOperand(0).getNode(); - if (!isa(Base)) { - if (Base->getOpcode() != ISD::BIT_CONVERT) - return SDValue(); - Base = Base->getOperand(0).getNode(); - if (!isa(Base)) - return SDValue(); + LD->isVolatile(), LD->getAlignment()); + } else if (NumElems == 4 && LastLoadedElt == 1) { + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); } - - // Transform it into VZEXT_LOAD addr. - LoadSDNode *LD = cast(Base); - - // Load must not be an extload. - if (LD->getExtensionType() != ISD::NON_EXTLOAD) - return SDValue(); - - // Load type should legal type so we don't have to legalize it. - if (!TLI.isTypeLegal(VT)) - return SDValue(); - - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; - SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); - TargetLowering::TargetLoweringOpt TLO(DAG); - TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); - DCI.CommitTargetLoweringOpt(TLO); - return ResNode; + return SDValue(); } /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. @@ -8557,9 +8378,9 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - isSplatMask(ShAmtOp.getOperand(2).getNode())) { - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); + cast(ShAmtOp)->isSplat()) { + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, + DAG.getIntPtrConstant(0)); } else return SDValue(); @@ -8630,7 +8451,10 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, if (VT.getSizeInBits() != 64) return SDValue(); - bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2(); + const Function *F = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps + && Subtarget->hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && isa(St->getValue()) && @@ -8772,14 +8596,77 @@ static SDValue PerformBTCombine(SDNode *N, return SDValue(); } +static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + if (Op.getOpcode() == ISD::BIT_CONVERT) + Op = Op.getOperand(0); + MVT VT = N->getValueType(0), OpVT = Op.getValueType(); + if (Op.getOpcode() == X86ISD::VZEXT_LOAD && + VT.getVectorElementType().getSizeInBits() == + OpVT.getVectorElementType().getSizeInBits()) { + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); + } + return SDValue(); +} + +// On X86 and X86-64, atomic operations are lowered to locked instructions. +// Locked instructions, in turn, have implicit fence semantics (all memory +// operations are flushed before issuing the locked instruction, and the +// are not buffered), so we can fold away the common pattern of +// fence-atomic-fence. +static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { + SDValue atomic = N->getOperand(0); + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + break; + default: + return SDValue(); + } + + SDValue fence = atomic.getOperand(0); + if (fence.getOpcode() != ISD::MEMBARRIER) + return SDValue(); + + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2), + atomic.getOperand(3)); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2)); + default: + return SDValue(); + } +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: break; case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); - case ISD::BUILD_VECTOR: - return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); @@ -8791,6 +8678,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FOR: return PerformFORCombine(N, DAG); case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); + case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); + case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG); } return SDValue(); @@ -8872,6 +8761,14 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } return; + case 'K': + if (ConstantSDNode *C = dyn_cast(Op)) { + if ((int8_t)C->getSExtValue() == C->getSExtValue()) { + Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + break; + } + } + return; case 'N': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 255) { @@ -8917,40 +8814,44 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // If we are in non-pic codegen mode, we allow the address of a global (with // an optional displacement) to be used with 'i'. - GlobalAddressSDNode *GA = dyn_cast(Op); + GlobalAddressSDNode *GA = 0; int64_t Offset = 0; - // Match either (GA) or (GA+C) - if (GA) { - Offset = GA->getOffset(); - } else if (Op.getOpcode() == ISD::ADD) { - ConstantSDNode *C = dyn_cast(Op.getOperand(1)); - GA = dyn_cast(Op.getOperand(0)); - if (C && GA) { - Offset = GA->getOffset()+C->getZExtValue(); - } else { - C = dyn_cast(Op.getOperand(1)); - GA = dyn_cast(Op.getOperand(0)); - if (C && GA) - Offset = GA->getOffset()+C->getZExtValue(); - else - C = 0, GA = 0; + // Match either (GA), (GA+C), (GA+C1+C2), etc. + while (1) { + if ((GA = dyn_cast(Op))) { + Offset += GA->getOffset(); + break; + } else if (Op.getOpcode() == ISD::ADD) { + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + Offset += C->getZExtValue(); + Op = Op.getOperand(0); + continue; + } + } else if (Op.getOpcode() == ISD::SUB) { + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + Offset += -C->getZExtValue(); + Op = Op.getOperand(0); + continue; + } } - } - if (GA) { - if (hasMemory) - Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), - Offset, DAG); - else - Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), - Offset); - Result = Op; - break; + // Otherwise, this isn't something we can handle, reject it. + return; } + // If we require an extra load to get this address, as in PIC mode, we + // can't accept it. + if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), + getTargetMachine(), false)) + return; - // Otherwise, not valid for this mode. - return; + if (hasMemory) + Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG); + else + Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + Offset); + Result = Op; + break; } } @@ -9086,7 +8987,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } if (DestReg) { Res.first = DestReg; - Res.second = Res.second = X86::GR8RegisterClass; + Res.second = X86::GR8RegisterClass; } } else if (VT == MVT::i32) { unsigned DestReg = 0; @@ -9103,7 +9004,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } if (DestReg) { Res.first = DestReg; - Res.second = Res.second = X86::GR32RegisterClass; + Res.second = X86::GR32RegisterClass; } } else if (VT == MVT::i64) { unsigned DestReg = 0; @@ -9120,7 +9021,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } if (DestReg) { Res.first = DestReg; - Res.second = Res.second = X86::GR64RegisterClass; + Res.second = X86::GR64RegisterClass; } } } else if (Res.second == X86::FR32RegisterClass ||