From: Evan Cheng Date: Wed, 29 Mar 2006 01:30:51 +0000 (+0000) Subject: - Only use pshufd for v4i32 vector shuffles. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=4f5633883b2e9ba187885e09e81564a53e549d60;p=oota-llvm.git - Only use pshufd for v4i32 vector shuffles. - Other shuffle related fixes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27244 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 303b808a8d6..8247167920c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1583,15 +1583,21 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { return Mask; } -/// CommuteVectorShuffleIfNeeded - Swap vector_shuffle operands (as well as -/// values in ther permute mask if needed. Return an empty SDOperand is it is -/// already well formed. -static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2, - SDOperand Mask, MVT::ValueType VT, - SelectionDAG &DAG) { +/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as +/// values in ther permute mask if needed. Use V1 as second vector if it is +/// undef. Return an empty SDOperand is it is already well formed. +static SDOperand NormalizeVectorShuffle(SDOperand V1, SDOperand V2, + SDOperand Mask, MVT::ValueType VT, + SelectionDAG &DAG) { unsigned NumElems = Mask.getNumOperands(); SDOperand Half1 = Mask.getOperand(0); SDOperand Half2 = Mask.getOperand(NumElems/2); + bool V2Undef = false; + if (V2.getOpcode() == ISD::UNDEF) { + V2Undef = true; + V2 = V1; + } + if (cast(Half1)->getValue() >= NumElems && cast(Half2)->getValue() < NumElems) { // Swap the operands and change mask. @@ -1604,6 +1610,10 @@ static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2, DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); } + + if (V2Undef) + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); + return SDOperand(); } @@ -2387,8 +2397,26 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType VT = Op.getValueType(); unsigned NumElems = PermMask.getNumOperands(); - if (NumElems == 2) - return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG); + if (X86::isUNPCKLMask(PermMask.Val) || + X86::isUNPCKHMask(PermMask.Val)) + // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. + return SDOperand(); + + // PSHUFD's 2nd vector must be undef. + if (MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val)) + if (V2.getOpcode() == ISD::UNDEF) + return SDOperand(); + else + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, + DAG.getNode(ISD::UNDEF, V1.getValueType()), + PermMask); + + if (NumElems == 2 || + X86::isSplatMask(PermMask.Val) || + X86::isSHUFPMask(PermMask.Val)) { + return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG); + } +#if 0 else if (X86::isSplatMask(PermMask.Val)) { // Handle splat cases. if (V2.getOpcode() == ISD::UNDEF) @@ -2400,10 +2428,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); - } else if (X86::isUNPCKLMask(PermMask.Val) || - X86::isUNPCKHMask(PermMask.Val)) { - // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. - return SDOperand(); } else if (X86::isPSHUFDMask(PermMask.Val)) { if (V2.getOpcode() == ISD::UNDEF) // Leave the VECTOR_SHUFFLE alone. It matches PSHUFD. @@ -2414,7 +2438,8 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); } else if (X86::isSHUFPMask(PermMask.Val)) - return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG); + return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG); +#endif assert(0 && "Unexpected VECTOR_SHUFFLE to lower"); abort(); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 82d4c1f874f..18d889f69fc 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -79,9 +79,8 @@ def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isUNPCKHMask(N); }]>; -// Only use PSHUF if it is not a splat. def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ - return !X86::isSplatMask(N) && X86::isPSHUFDMask(N); + return X86::isPSHUFDMask(N); }], SHUFFLE_get_shuf_imm>; def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ @@ -918,86 +917,92 @@ def PSHUFWrm : PSIi8<0x70, MRMSrcMem, "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; def PSHUFDrr : PDIi8<0x70, MRMDestReg, (ops VR128:$dst, VR128:$src1, i8imm:$src2), - "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>; + "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v4i32 (vector_shuffle + VR128:$src1, (undef), + PSHUFD_shuffle_mask:$src2)))]>; def PSHUFDrm : PDIi8<0x70, MRMSrcMem, (ops VR128:$dst, i128mem:$src1, i8imm:$src2), - "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>; + "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v4i32 (vector_shuffle + (load addr:$src1), (undef), + PSHUFD_shuffle_mask:$src2)))]>; let isTwoAddress = 1 in { def SHUFPSrr : PSIi8<0xC6, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3), "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (vector_shuffle - (v4f32 VR128:$src1), (v4f32 VR128:$src2), - SHUFP_shuffle_mask:$src3))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$src3)))]>; def SHUFPSrm : PSIi8<0xC6, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3), "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (vector_shuffle - (v4f32 VR128:$src1), (load addr:$src2), - SHUFP_shuffle_mask:$src3))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, (load addr:$src2), + SHUFP_shuffle_mask:$src3)))]>; def SHUFPDrr : PDIi8<0xC6, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3), "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (vector_shuffle - (v2f64 VR128:$src1), (v2f64 VR128:$src2), - SHUFP_shuffle_mask:$src3))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$src3)))]>; def SHUFPDrm : PDIi8<0xC6, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3), "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (vector_shuffle - (v2f64 VR128:$src1), (load addr:$src2), - SHUFP_shuffle_mask:$src3))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, (load addr:$src2), + SHUFP_shuffle_mask:$src3)))]>; def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpckhps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPSrm : PSI<0x15, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpckhps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; def UNPCKHPDrr : PDI<0x15, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpckhpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPDrm : PDI<0x15, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpckhpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpcklps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpcklps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), - UNPCKL_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpcklpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpcklpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), - UNPCKL_shuffle_mask)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; } //===----------------------------------------------------------------------===// @@ -1354,11 +1359,3 @@ def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm), (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>; def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm), (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>; - -// Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not. -def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm), - (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>, - Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm), - (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>, - Requires<[HasSSE2]>;