Mask, Subtarget, DAG))
return V;
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 2, 10, 3, 11))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
+ if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
+
if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
Subtarget, DAG))
define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
; SSE: # BB#0:
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <8 x i16> %shuffle
define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
; SSE: # BB#0:
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
ret <8 x i16> %shuffle
;
; AVX1-LABEL: zext_8i16_to_8i32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpmovzxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq