From 646722d55f8016e6c8edbff55dfc02157114d658 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 3 Feb 2015 20:09:18 +0000 Subject: [PATCH] [X86][SSE] Added general integer shuffle matching for MOVQ instruction This patch adds general shuffle pattern matching for the MOVQ zero-extend instruction (copy lower 64bits, zero upper) for all 128-bit integer vectors, it is added as a fallback test in lowerVectorShuffleAsZeroOrAnyExtend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228022 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 23 ++++++++++++++++++++++ test/CodeGen/X86/combine-or.ll | 21 ++++++++++---------- test/CodeGen/X86/vector-shuffle-128-v16.ll | 16 +++++++-------- test/CodeGen/X86/vector-shuffle-128-v4.ll | 14 +++++++++++++ test/CodeGen/X86/vector-shuffle-128-v8.ll | 14 +++++++++++++ 5 files changed, 68 insertions(+), 20 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d35e35e8aa2..d58715ab7f6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7962,6 +7962,29 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend( return V; } + // General extends failed, but 128-bit vectors may be able to use MOVQ. + if (Bits != 128) + return SDValue(); + + // Returns one of the source operands if the shuffle can be reduced to a + // MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits. + auto CanZExtLowHalf = [&]() { + for (int i = NumElements / 2; i != NumElements; i++) + if (!Zeroable[i]) + return SDValue(); + if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0)) + return V1; + if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements)) + return V2; + return SDValue(); + }; + + if (SDValue V = CanZExtLowHalf()) { + V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V); + V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V); + return DAG.getNode(ISD::BITCAST, DL, VT, V); + } + // No viable ext lowering found. return SDValue(); } diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll index 280fcbc7a3a..0eb72fa1603 100644 --- a/test/CodeGen/X86/combine-or.ll +++ b/test/CodeGen/X86/combine-or.ll @@ -204,17 +204,16 @@ define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { ; shuffle instruction when the shuffle indexes are not compatible. define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test17: -; CHECK: # BB#0: -; CHECK-NEXT: xorps %xmm2, %xmm2 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0] -; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2] -; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3] -; CHECK-NEXT: orps %xmm1, %xmm2 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: retq - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32> +; CHECK-LABEL: test17: +; CHECK: # BB#0: +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2] +; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3] +; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero +; CHECK-NEXT: orps %xmm2, %xmm0 +; CHECK-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32> %or = or <4 x i32> %shuf1, %shuf2 ret <4 x i32> %or } diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index eec703f3670..933b0111efc 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -460,27 +460,25 @@ define <16 x i8> @PR20540(<8 x i8> %a) { ; SSE2: # BB#0: ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] -; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE2-NEXT: retq ; ; SSSE3-LABEL: PR20540: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSSE3-NEXT: retq ; ; SSE41-LABEL: PR20540: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE41-NEXT: retq ; ; AVX-LABEL: PR20540: ; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: retq %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index dadbef12434..882bac414b7 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -1076,6 +1076,20 @@ define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) { ret <4 x i32> %shuffle } +define <4 x i32> @shuffle_v4i32_01zu(<4 x i32> %a) { +; SSE-LABEL: shuffle_v4i32_01zu: +; SSE: # BB#0: +; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_01zu: +; AVX: # BB#0: +; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> + ret <4 x i32> %shuffle +} + define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) { ; SSE-LABEL: insert_reg_and_zero_v4i32: ; SSE: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index 9d09c31b9f8..4eb4d626aa4 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1917,3 +1917,17 @@ define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) { %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle } + +define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) { +; SSE-LABEL: shuffle_v8i16_01u3zzuz: +; SSE: # BB#0: +; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_01u3zzuz: +; AVX: # BB#0: +; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %shuffle +} -- 2.34.1