From: Evan Cheng Date: Fri, 21 Apr 2006 01:05:10 +0000 (+0000) Subject: Now generating perfect (I think) code for "vector set" with a single non-zero X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=017dcc6e556f3f89dd3e3881696084af694718ac;p=oota-llvm.git Now generating perfect (I think) code for "vector set" with a single non-zero scalar value. e.g. _mm_set_epi32(0, a, 0, 0); ==> movd 4(%esp), %xmm0 pshufd $69, %xmm0, %xmm0 _mm_set_epi8(0, 0, 0, 0, 0, a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ==> movzbw 4(%esp), %ax movzwl %ax, %eax pxor %xmm0, %xmm0 pinsrw $5, %eax, %xmm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27923 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 877854e51a9..497338f1642 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1687,11 +1687,12 @@ bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { return true; } -/// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to MOVS{S|D}. -static bool isMOVSMask(std::vector &N) { +/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVSS, +/// MOVSD, and MOVD, i.e. setting the lowest element. +static bool isMOVLMask(std::vector &N) { unsigned NumElems = N.size(); - if (NumElems != 2 && NumElems != 4) + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; if (!isUndefOrEqual(N[0], NumElems)) @@ -1706,18 +1707,18 @@ static bool isMOVSMask(std::vector &N) { return true; } -bool X86::isMOVSMask(SDNode *N) { +bool X86::isMOVLMask(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR); std::vector Ops(N->op_begin(), N->op_end()); - return ::isMOVSMask(Ops); + return ::isMOVLMask(Ops); } -/// isCommutedMOVS - Returns true if the shuffle mask is except the reverse -/// of what x86 movs want. X86 movs requires the lowest element to be lowest +/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse +/// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -static bool isCommutedMOVS(std::vector &Ops, bool V2IsSplat = false) { +static bool isCommutedMOVL(std::vector &Ops, bool V2IsSplat = false) { unsigned NumElems = Ops.size(); - if (NumElems != 2 && NumElems != 4) + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; if (!isUndefOrEqual(Ops[0], 0)) @@ -1737,10 +1738,10 @@ static bool isCommutedMOVS(std::vector &Ops, bool V2IsSplat = false) return true; } -static bool isCommutedMOVS(SDNode *N, bool V2IsSplat = false) { +static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) { assert(N->getOpcode() == ISD::BUILD_VECTOR); std::vector Ops(N->op_begin(), N->op_end()); - return isCommutedMOVS(Ops); + return isCommutedMOVL(Ops, V2IsSplat); } /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand @@ -2055,9 +2056,9 @@ static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { return Mask; } -/// getMOVSMask - Returns a vector_shuffle mask for an movs{s|d} operation -/// of specified width. -static SDOperand getMOVSMask(unsigned NumElems, SelectionDAG &DAG) { +/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd +/// operation of specified width. +static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); @@ -2095,30 +2096,63 @@ static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); } +/// getZeroVector - Returns a vector of specified type with all zero elements. +/// +static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { + assert(MVT::isVector(VT) && "Expected a vector type"); + unsigned NumElems = getVectorNumElements(VT); + MVT::ValueType EVT = MVT::getVectorBaseType(VT); + bool isFP = MVT::isFloatingPoint(EVT); + SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); + std::vector ZeroVec(NumElems, Zero); + return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec); +} + /// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. /// static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { SDOperand V1 = Op.getOperand(0); - SDOperand PermMask = Op.getOperand(2); + SDOperand Mask = Op.getOperand(2); MVT::ValueType VT = Op.getValueType(); - unsigned NumElems = PermMask.getNumOperands(); - PermMask = getUnpacklMask(NumElems, DAG); + unsigned NumElems = Mask.getNumOperands(); + Mask = getUnpacklMask(NumElems, DAG); while (NumElems != 4) { - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, PermMask); + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); NumElems >>= 1; } V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); - SDOperand Zero = DAG.getConstant(0, MVT::getVectorBaseType(MaskVT)); - std::vector ZeroVec(4, Zero); - SDOperand SplatMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, ZeroVec); + Mask = getZeroVector(MaskVT, DAG); SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, - DAG.getNode(ISD::UNDEF, MVT::v4i32), - SplatMask); + DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); } +/// isZeroNode - Returns true if Elt is a constant zero or a floating point +/// constant +0.0. +static inline bool isZeroNode(SDOperand Elt) { + return ((isa(Elt) && + cast(Elt)->getValue() == 0) || + (isa(Elt) && + cast(Elt)->isExactlyValue(0.0))); +} + +/// getShuffleVectorAgainstZero - Return a vector_shuffle of a zero vector and +/// the specified vector. +static SDOperand getShuffleVectorAgainstZero(SDOperand Vec, MVT::ValueType VT, + unsigned NumElems, unsigned Idx, + SelectionDAG &DAG) { + SDOperand ZeroV = getZeroVector(VT, DAG); + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); + SDOperand Zero = DAG.getConstant(0, EVT); + std::vector MaskVec(NumElems, Zero); + MaskVec[Idx] = DAG.getConstant(NumElems, EVT); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, ZeroV, Vec, Mask); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -2924,7 +2958,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { SDOperand PermMask = Op.getOperand(2); MVT::ValueType VT = Op.getValueType(); unsigned NumElems = PermMask.getNumOperands(); - bool V2IsSplat = isSplatVector(V2.Val); if (isSplatMask(PermMask.Val)) { if (NumElems <= 4) return Op; @@ -2932,7 +2965,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return PromoteSplat(Op, DAG); } - if (X86::isMOVSMask(PermMask.Val) || + if (X86::isMOVLMask(PermMask.Val) || X86::isMOVSHDUPMask(PermMask.Val) || X86::isMOVSLDUPMask(PermMask.Val) || X86::isMOVHLPSMask(PermMask.Val) || @@ -2944,15 +2977,30 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { ShouldXformToMOVLP(V1.Val, PermMask.Val)) return CommuteVectorShuffle(Op, DAG); - if (isCommutedMOVS(PermMask.Val, V2IsSplat)) { + bool V1IsSplat = isSplatVector(V1.Val); + bool V2IsSplat = isSplatVector(V2.Val); + if (V1IsSplat && !V2IsSplat) { + Op = CommuteVectorShuffle(Op, DAG); + V1 = Op.getOperand(0); + V2 = Op.getOperand(1); + PermMask = Op.getOperand(2); + V2IsSplat = true; + } + + if (isCommutedMOVL(PermMask.Val, V2IsSplat)) { + Op = CommuteVectorShuffle(Op, DAG); + V1 = Op.getOperand(0); + V2 = Op.getOperand(1); + PermMask = Op.getOperand(2); if (V2IsSplat) { // V2 is a splat, so the mask may be malformed. That is, it may point // to any V2 element. The instruction selectior won't like this. Get // a corrected mask and commute to form a proper MOVS{S|D}. - SDOperand NewMask = getMOVSMask(NumElems, DAG); - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); + SDOperand NewMask = getMOVLMask(NumElems, DAG); + if (NewMask.Val != PermMask.Val) + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); } - return CommuteVectorShuffle(Op, DAG); + return Op; } if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || @@ -3088,48 +3136,60 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { if (ISD::isBuildVectorAllOnes(Op.Val)) return Op; - std::set Values; - SDOperand Elt0 = Op.getOperand(0); - Values.insert(Elt0); - bool Elt0IsZero = (isa(Elt0) && - cast(Elt0)->getValue() == 0) || - (isa(Elt0) && - cast(Elt0)->isExactlyValue(0.0)); - bool RestAreZero = true; unsigned NumElems = Op.getNumOperands(); - for (unsigned i = 1; i < NumElems; ++i) { - SDOperand Elt = Op.getOperand(i); - if (ConstantFPSDNode *FPC = dyn_cast(Elt)) { - if (!FPC->isExactlyValue(+0.0)) - RestAreZero = false; - } else if (ConstantSDNode *C = dyn_cast(Elt)) { - if (!C->isNullValue()) - RestAreZero = false; - } else - RestAreZero = false; + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType EVT = MVT::getVectorBaseType(VT); + std::vector NonZeros; + std::set Values; + for (unsigned i = 0; i < NumElems; ++i) { + unsigned Idx = NumElems - i - 1; + SDOperand Elt = Op.getOperand(Idx); Values.insert(Elt); + if (!isZeroNode(Elt)) + NonZeros.push_back(Idx); } - if (RestAreZero) { - if (Elt0IsZero) return Op; - - // Zero extend a scalar to a vector. - if (Elt0.getValueType() != MVT::i64) - return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); + if (NonZeros.size() == 0) + return Op; - // See if we can turn it into a f64 op. - bool IsLegal = false; - if (ConstantSDNode *C = dyn_cast(Elt0)) { - Elt0 = DAG.getConstantFP(BitsToDouble(C->getValue()), MVT::f64); - IsLegal = true; - } else if (Elt0.getOpcode() == ISD::LOAD) { - Elt0 = DAG.getLoad(MVT::f64, Elt0.getOperand(0), Elt0.getOperand(1), - Elt0.getOperand(2)); - IsLegal = true; + if (NonZeros.size() == 1) { + unsigned Idx = NonZeros[0]; + SDOperand Item = Op.getOperand(Idx); + if (Idx == 0 || MVT::getSizeInBits(EVT) >= 32) + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR,VT, Item); + if (Idx == 0) + return getShuffleVectorAgainstZero(Item, VT, NumElems, Idx, DAG); + + // If element VT is < 32, convert it to a insert into a zero vector. + if (MVT::getSizeInBits(EVT) <= 16) { + SDOperand ZeroV; + if (EVT == MVT::i8) { + Item = DAG.getNode(ISD::ANY_EXTEND, MVT::i16, Item); + if ((Idx % 2) != 0) + Item = DAG.getNode(ISD::SHL, MVT::i16, + Item, DAG.getConstant(8, MVT::i8)); + Idx /= 2; + ZeroV = getZeroVector(MVT::v8i16, DAG); + return DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, ZeroV, Item, + DAG.getConstant(Idx, MVT::i32))); + } else { + ZeroV = getZeroVector(VT, DAG); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, VT, ZeroV, Item, + DAG.getConstant(Idx, MVT::i32)); + } } - if (IsLegal) - return DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, - DAG.getNode(X86ISD::ZEXT_S2VEC, MVT::v2f64, Elt0)); + + // Turn it into a shuffle of zero and zero-extended scalar to vector. + Item = getShuffleVectorAgainstZero(Item, VT, NumElems, 0, DAG); + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); + std::vector MaskVec; + for (unsigned i = 0; i < NumElems; i++) + MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, + DAG.getNode(ISD::UNDEF, VT), Mask); } if (Values.size() > 2) { @@ -3138,7 +3198,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { // Step 1: unpcklps 0, 2 ==> X: // : unpcklps 1, 3 ==> Y: // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - MVT::ValueType VT = Op.getValueType(); SDOperand PermMask = getUnpacklMask(NumElems, DAG); std::vector V(NumElems); for (unsigned i = 0; i < NumElems; ++i) @@ -3406,7 +3465,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::S2VEC: return "X86ISD::S2VEC"; - case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; } @@ -3514,7 +3572,7 @@ bool X86TargetLowering::isVectorClearMaskLegal(std::vector &BVOps, if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; if (NumElts == 2) return true; if (NumElts == 4) { - return (isMOVSMask(BVOps) || isCommutedMOVS(BVOps, true) || + return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); } return false; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3569c35d590..883bf7e131c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -150,10 +150,6 @@ namespace llvm { /// have to match the operand type. S2VEC, - /// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base - /// does not have to match the operand type. - ZEXT_S2VEC, - /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRW. PEXTRW, @@ -230,9 +226,10 @@ namespace llvm { /// <0, 0, 1, 1> bool isUNPCKL_v_undef_Mask(SDNode *N); - /// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVS{S|D}. - bool isMOVSMask(SDNode *N); + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to MOVSS, + /// MOVSD, and MOVD, i.e. setting the lowest element. + bool isMOVLMask(SDNode *N); /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 73dd49c1ab1..d874fd9ea9e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -29,8 +29,6 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest, [SDNPOutFlag]>; def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>; -def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC", - SDTypeProfile<1, 1, []>, []>; def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>; def X86pinsrw : SDNode<"X86ISD::PINSRW", @@ -104,8 +102,8 @@ def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isMOVLPMask(N); }]>; -def MOVS_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVSMask(N); +def MOVL_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVLMask(N); }]>; def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{ @@ -2194,20 +2192,18 @@ def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2) "movss {$src2, $dst|$dst, $src2}", []>; def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2), "movsd {$src2, $dst|$dst, $src2}", []>; -def MOVLDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2), - "movd {$src2, $dst|$dst, $src2}", []>; let AddedComplexity = 20 in { def MOVLPSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "movss {$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVS_shuffle_mask)))]>; + MOVL_shuffle_mask)))]>; def MOVLPDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "movsd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, - MOVS_shuffle_mask)))]>; + MOVL_shuffle_mask)))]>; } } @@ -2223,23 +2219,36 @@ def MOVLQ128rr : PDI<0xD6, MRMSrcReg, (ops VR128:$dst, VR128:$src), // Move to lower bits of a VR128 and zeroing upper bits. // Loading from memory automatically zeroing upper bits. +let AddedComplexity = 20 in { def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src), "movss {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV, + (v4f32 (scalar_to_vector (loadf32 addr:$src))), + MOVL_shuffle_mask)))]>; def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), "movsd {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle immAllZerosV, + (v2f64 (scalar_to_vector (loadf64 addr:$src))), + MOVL_shuffle_mask)))]>; +// movd / movq to XMM register zero-extends +def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, (v4i32 (vector_shuffle immAllZerosV, + (v4i32 (scalar_to_vector R32:$src)), + MOVL_shuffle_mask)))]>; def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), "movd {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>; + [(set VR128:$dst, (v4i32 (vector_shuffle immAllZerosV, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), + MOVL_shuffle_mask)))]>; +def MOVZQI2PQIrr : PDI<0x7E, MRMSrcMem, (ops VR128:$dst, VR64:$src), + "movq {$src, $dst|$dst, $src}", []>; def MOVZQI2PQIrm : PDI<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), "movq {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (bc_v2i64 (v2f64 (X86zexts2vec - (loadf64 addr:$src)))))]>; + [(set VR128:$dst, (bc_v2i64 (vector_shuffle immAllZerosV, + (v2f64 (scalar_to_vector (loadf64 addr:$src))), + MOVL_shuffle_mask)))]>; +} //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -2341,17 +2350,23 @@ def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>, def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>, Requires<[HasSSE2]>; -// Zeroing a VR128 then do a MOVS* to the lower bits. -def : Pat<(v2f64 (X86zexts2vec FR64:$src)), +// Move scalar to XMM zero-extended +// movd to XMM register zero-extends +let AddedComplexity = 20 in { +def : Pat<(v8i16 (vector_shuffle immAllZerosV, + (v8i16 (X86s2vec R32:$src)), MOVL_shuffle_mask)), + (MOVZDI2PDIrr R32:$src)>, Requires<[HasSSE2]>; +def : Pat<(v16i8 (vector_shuffle immAllZerosV, + (v16i8 (X86s2vec R32:$src)), MOVL_shuffle_mask)), + (MOVZDI2PDIrr R32:$src)>, Requires<[HasSSE2]>; +// Zeroing a VR128 then do a MOVS{S|D} to the lower bits. +def : Pat<(v2f64 (vector_shuffle immAllZerosV, + (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)), (MOVLSD2PDrr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (X86zexts2vec FR32:$src)), +def : Pat<(v4f32 (vector_shuffle immAllZerosV, + (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)), (MOVLSS2PSrr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (X86zexts2vec R32:$src)), - (MOVLDI2PDIrr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (X86zexts2vec R16:$src)), - (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>; -def : Pat<(v16i8 (X86zexts2vec R8:$src)), - (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>; +} // Splat v2f64 / v2i64 let AddedComplexity = 10 in { @@ -2448,13 +2463,13 @@ def : Pat<(v2f64 (vector_shuffle VR128:$src1, (loadv2f64 addr:$src2), (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVS_shuffle_mask)), + MOVL_shuffle_mask)), (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)), MOVLP_shuffle_mask)), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, - MOVS_shuffle_mask)), + MOVL_shuffle_mask)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (vector_shuffle VR128:$src1, (loadv2i64 addr:$src2), MOVHP_shuffle_mask)),