From: Chandler Carruth Date: Thu, 18 Sep 2014 04:11:29 +0000 (+0000) Subject: [x86] Initial step of teaching the new vector shuffle lowering about X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=3ff76847baebdc6f7256a5bdb0056191ce00f4a0;p=oota-llvm.git [x86] Initial step of teaching the new vector shuffle lowering about PALIGNR. This just adds it to the v8i16 and v16i8 lowering steps where it is completely unmatched. It also introduces the logic for detecting rotation shuffle masks even in the presence of single input or blend masks and arbitrarily undef lanes. I've added fairly comprehensive tests for the matching logic in v8i16 because the tests at that size are much easier to write and manage. I've not checked the SSE2 code generated for these tests because the code is *horrible*. It is absolute madness. Testing it will just make the test brittle without giving any interesting improvements in the correctness confidence. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218013 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c6b9ee0370d..8721d8ffe50 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7256,6 +7256,103 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, DAG.getConstant(BlendMask, MVT::i8))); } +/// \brief Try to lower a vector shuffle as a byte rotation. +/// +/// We have a generic PALIGNR instruction in x86 that will do an arbitrary +/// byte-rotation of a the concatentation of two vectors. This routine will +/// try to generically lower a vector shuffle through such an instruction. It +/// does not check for the availability of PALIGNR-based lowerings, only the +/// applicability of this strategy to the given mask. This matches shuffle +/// vectors that look like: +/// +/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2] +/// +/// Essentially it concatenates V1 and V2, shifts right by some number of +/// elements, and takes the low elements as the result. Note that while this is +/// specified as a *right shift* because x86 is little-endian, it is a *left +/// rotate* of the vector lanes. +/// +/// Note that this only handles 128-bit vector widths currently. +static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, + SDValue V2, + ArrayRef Mask, + SelectionDAG &DAG) { + assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); + + // We need to detect various ways of spelling a rotation: + // [11, 12, 13, 14, 15, 0, 1, 2] + // [-1, 12, 13, 14, -1, -1, 1, -1] + // [-1, -1, -1, -1, -1, -1, 1, 2] + // [ 3, 4, 5, 6, 7, 8, 9, 10] + // [-1, 4, 5, 6, -1, -1, 9, -1] + // [-1, 4, 5, 6, -1, -1, -1, -1] + int Rotation = 0; + SDValue Lo, Hi; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + if (Mask[i] == -1) + continue; + assert(Mask[i] >= 0 && "Only -1 is a valid negative mask element!"); + + // Based on the mod-Size value of this mask element determine where + // a rotated vector would have started. + int StartIdx = i - (Mask[i] % Size); + if (StartIdx == 0) + // The identity rotation isn't interesting, stop. + return SDValue(); + + // If we found the tail of a vector the rotation must be the missing + // front. If we found the head of a vector, it must be how much of the head. + int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; + + if (Rotation == 0) + Rotation = CandidateRotation; + else if (Rotation != CandidateRotation) + // The rotations don't match, so we can't match this mask. + return SDValue(); + + // Compute which value this mask is pointing at. + SDValue MaskV = Mask[i] < Size ? V1 : V2; + + // Compute which of the two target values this index should be assigned to. + // This reflects whether the high elements are remaining or the low elements + // are remaining. + SDValue &TargetV = StartIdx < 0 ? Hi : Lo; + + // Either set up this value if we've not encountered it before, or check + // that it remains consistent. + if (!TargetV) + TargetV = MaskV; + else if (TargetV != MaskV) + // This may be a rotation, but it pulls from the inputs in some + // unsupported interleaving. + return SDValue(); + } + + // Check that we successfully analyzed the mask, and normalize the results. + assert(Rotation != 0 && "Failed to locate a viable rotation!"); + assert((Lo || Hi) && "Failed to find a rotated input vector!"); + if (!Lo) + Lo = Hi; + else if (!Hi) + Hi = Lo; + + // Cast the inputs to v16i8 to match PALIGNR. + Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Lo); + Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Hi); + + assert(VT.getSizeInBits() == 128 && + "Rotate-based lowering only supports 128-bit lowering!"); + assert(Mask.size() <= 16 && + "Can shuffle at most 16 bytes in a 128-bit vector!"); + // The actual rotate instruction rotates bytes, so we need to scale the + // rotation based on how many bytes are in the vector. + int Scale = 16 / Mask.size(); + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(X86ISD::PALIGNR, DL, MVT::v16i8, Hi, Lo, + DAG.getConstant(Rotation * Scale, MVT::i8))); +} + /// \brief Handle lowering of 2-lane 64-bit floating point shuffles. /// /// This is the basis function for the 2-lane 64-bit shuffles as we have full @@ -7692,6 +7789,12 @@ static SDValue lowerV8I16SingleInputVectorShuffle( if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V); + // Try to use rotation instructions if available. + if (Subtarget->hasSSSE3()) + if (SDValue Rotate = lowerVectorShuffleAsByteRotate( + DL, MVT::v8i16, V, V, Mask, DAG)) + return Rotate; + // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all // such inputs we can swap two of the dwords across the half mark and end up // with <=2 inputs to each half in each half. Once there, we can fall through @@ -8298,6 +8401,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) return Blend; + // Try to use rotation instructions if available. + if (Subtarget->hasSSSE3()) + if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask, DAG)) + return Rotate; + if (NumV1Inputs + NumV2Inputs <= 4) return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG); @@ -8426,6 +8534,13 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef OrigMask = SVOp->getMask(); assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!"); + + // Try to use rotation instructions if available. + if (Subtarget->hasSSSE3()) + if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v16i8, V1, V2, + OrigMask, DAG)) + return Rotate; + int MaskStorage[16] = { OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3], OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7], diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 38734eda941..58786db4031 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -372,3 +372,59 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz( %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> ret <16 x i8> %shuffle } + +define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { +; SSSE3-LABEL: @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $15, {{.*}} # xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; SSSE3-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { +; SSSE3-LABEL: @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $15, {{.*}} # xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; SSSE3-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) { +; SSSE3-LABEL: @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $1, {{.*}} # xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] +; SSSE3-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) { +; SSSE3-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $1, {{.*}} # xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) { +; SSSE3-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $1, {{.*}} # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] +; SSSE3-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) { +; SSSE3-LABEL: @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $15, {{.*}} # xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle +} diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index f959fea472f..92a2282f0ed 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -845,3 +845,133 @@ define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) { %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> ret <8 x i16> %shuffle } + +define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_def01234 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_ueuu123u +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_56701234 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_u6uu123u +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_uuuu123u +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_bcdef012 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_ucdeuu1u +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_34567012 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_u456uu1u +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_u456uuuu +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_3456789a +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $6, {{.*}} # xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_u456uu9u +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $6, {{.*}} # xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_56789abc +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $10, {{.*}} # xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) { +; SSSE3-LABEL: @shuffle_v8i16_u6uu9abu +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $10, {{.*}} # xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +}