return V;
}
+ // General extends failed, but 128-bit vectors may be able to use MOVQ.
+ if (Bits != 128)
+ return SDValue();
+
+ // Returns one of the source operands if the shuffle can be reduced to a
+ // MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits.
+ auto CanZExtLowHalf = [&]() {
+ for (int i = NumElements / 2; i != NumElements; i++)
+ if (!Zeroable[i])
+ return SDValue();
+ if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0))
+ return V1;
+ if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements))
+ return V2;
+ return SDValue();
+ };
+
+ if (SDValue V = CanZExtLowHalf()) {
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V);
+ return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ }
+
// No viable ext lowering found.
return SDValue();
}
; shuffle instruction when the shuffle indexes are not compatible.
define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test17:
-; CHECK: # BB#0:
-; CHECK-NEXT: xorps %xmm2, %xmm2
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0]
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2]
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; CHECK-NEXT: orps %xmm1, %xmm2
-; CHECK-NEXT: movaps %xmm2, %xmm0
-; CHECK-NEXT: retq
- %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
- %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
+; CHECK-LABEL: test17:\r
+; CHECK: # BB#0:\r
+; CHECK-NEXT: xorps %xmm2, %xmm2\r
+; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2]\r
+; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]\r
+; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero\r
+; CHECK-NEXT: orps %xmm2, %xmm0\r
+; CHECK-NEXT: retq\r
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>\r
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>\r
%or = or <4 x i32> %shuf1, %shuf2
ret <4 x i32> %or
}
; SSE2: # BB#0:
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
-; SSE2-NEXT: packuswb %xmm1, %xmm0
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE2-NEXT: retq
;
; SSSE3-LABEL: PR20540:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: PR20540:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: PR20540:
; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i8> %shuffle
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x i16> %shuffle
}
+
+define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_01u3zzuz:
+; SSE: # BB#0:
+; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_01u3zzuz:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
+ ret <8 x i16> %shuffle
+}