// [ 5, 6, 7, zz, zz, zz, zz, zz]
// [ -1, 5, 6, 7, zz, zz, zz, zz]
// [ 1, 2, -1, -1, -1, -1, zz, zz]
- auto MatchByteShift = [&](int Shift) -> SDValue {
- bool MatchLeft = true, MatchRight = true;
- for (int l = 0; l < NumElts; l += NumLaneElts) {
+
+ auto CheckZeros = [&](int Shift, bool LeftShift) {
+ for (int l = 0; l < NumElts; l += NumLaneElts)
for (int i = 0; i < Shift; ++i)
- MatchLeft &= Zeroable[l + i];
- for (int i = NumLaneElts - Shift; i < NumLaneElts; ++i)
- MatchRight &= Zeroable[l + i];
- }
- if (!(MatchLeft || MatchRight))
- return SDValue();
+ if (!Zeroable[l + i + (LeftShift ? 0 : (NumLaneElts - Shift))])
+ return false;
- bool MatchV1 = true, MatchV2 = true;
+ return true;
+ };
+
+ auto MatchByteShift = [&](int Shift, bool LeftShift, SDValue V) {
for (int l = 0; l < NumElts; l += NumLaneElts) {
- unsigned Pos = MatchLeft ? Shift + l : l;
- unsigned Low = MatchLeft ? l : Shift + l;
+ unsigned Pos = LeftShift ? Shift + l : l;
+ unsigned Low = LeftShift ? l : Shift + l;
unsigned Len = NumLaneElts - Shift;
- MatchV1 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low);
- MatchV2 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low + NumElts);
+ if (!isSequentialOrUndefInRange(Mask, Pos, Len,
+ Low + (V == V1 ? 0 : NumElts)))
+ return SDValue();
}
- if (!(MatchV1 || MatchV2))
- return SDValue();
int ByteShift = Shift * Scale;
- unsigned Op = MatchRight ? X86ISD::VSRLDQ : X86ISD::VSHLDQ;
- SDValue V = MatchV1 ? V1 : V2;
+ unsigned Op = LeftShift ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
- V = DAG.getNode(Op, DL, ShiftVT, V,
- DAG.getConstant(ByteShift, MVT::i8));
+ V = DAG.getNode(Op, DL, ShiftVT, V, DAG.getConstant(ByteShift, MVT::i8));
return DAG.getNode(ISD::BITCAST, DL, VT, V);
};
for (int Shift = 1; Shift < NumLaneElts; ++Shift)
- if (SDValue S = MatchByteShift(Shift))
- return S;
+ for (bool LeftShift : {true, false})
+ if (CheckZeros(Shift, LeftShift))
+ for (SDValue V : {V1, V2})
+ if (SDValue S = MatchByteShift(Shift, LeftShift, V))
+ return S;
// no match
return SDValue();
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
ret <8 x i16> %shuffle
}
+
+define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_fu3ucc5u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_fu3ucc5u:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_fu3ucc5u:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6],xmm1[7]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
+; AVX: # BB#0:
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6],xmm1[7]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
+ ret <8 x i16> %shuffle
+}