[X86][SSE] Added general integer shuffle matching for MOVQ instruction

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 3 Feb 2015 20:09:18 +0000 (20:09 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 3 Feb 2015 20:09:18 +0000 (20:09 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 3 Feb 2015 20:09:18 +0000 (20:09 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 3 Feb 2015 20:09:18 +0000 (20:09 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index d35e35e8aa294289eea259eaf2879eaaa430774d..d58715ab7f66700cb4b174f616c4d000e237584e 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7962,6 +7962,29 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
        return V;
    }
  
+  // General extends failed, but 128-bit vectors may be able to use MOVQ.
+  if (Bits != 128)
+    return SDValue();
+
+  // Returns one of the source operands if the shuffle can be reduced to a
+  // MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits.
+  auto CanZExtLowHalf = [&]() {
+    for (int i = NumElements / 2; i != NumElements; i++)
+      if (!Zeroable[i])
+        return SDValue();
+    if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0))
+      return V1;
+    if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements))
+      return V2;
+    return SDValue();
+  };
+
+  if (SDValue V = CanZExtLowHalf()) {
+    V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V);
+    V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V);
+    return DAG.getNode(ISD::BITCAST, DL, VT, V);
+  }
+
    // No viable ext lowering found.
    return SDValue();
  }
diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll

index 280fcbc7a3a7fd126e731c1c0ada8351e77b98c1..0eb72fa16036cd04d4c4efae5e5dfdc88ddb76f5 100644 (file)
--- a/test/CodeGen/X86/combine-or.ll
+++ b/test/CodeGen/X86/combine-or.ll
@@ -204,17 +204,16 @@ define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
  ; shuffle instruction when the shuffle indexes are not compatible.
  
  define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test17:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    xorps %xmm2, %xmm2
-; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0]
-; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2]
-; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; CHECK-NEXT:    orps %xmm1, %xmm2
-; CHECK-NEXT:    movaps %xmm2, %xmm0
-; CHECK-NEXT:    retq
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
+; CHECK-LABEL: test17:\r
+; CHECK:       # BB#0:\r
+; CHECK-NEXT:    xorps %xmm2, %xmm2\r
+; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2]\r
+; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]\r
+; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero\r
+; CHECK-NEXT:    orps %xmm2, %xmm0\r
+; CHECK-NEXT:    retq\r
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>\r
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>\r
    %or = or <4 x i32> %shuf1, %shuf2
    ret <4 x i32> %or
  }
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll

index eec703f36708f99416cea04762792969e0bb7656..933b0111efcb5fdaecca3b833e98ae084751825e 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -460,27 +460,25 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
  ; SSE2:       # BB#0:
  ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
  ; SSE2-NEXT:    packuswb %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
-; SSE2-NEXT:    packuswb %xmm1, %xmm0
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
  ; SSE2-NEXT:    retq
  ;
  ; SSSE3-LABEL: PR20540:
  ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
  ; SSSE3-NEXT:    retq
  ;
  ; SSE41-LABEL: PR20540:
  ; SSE41:       # BB#0:
-; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
  ; SSE41-NEXT:    retq
  ;
  ; AVX-LABEL: PR20540:
  ; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
  ; AVX-NEXT:    retq
    %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    ret <16 x i8> %shuffle
diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll

index dadbef124342a974fd49bac9cd08f123f98c0e49..882bac414b71e06aba288ec8fa0febd4f0783859 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1076,6 +1076,20 @@ define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
    ret <4 x i32> %shuffle
  }
  
+define <4 x i32> @shuffle_v4i32_01zu(<4 x i32> %a) {
+; SSE-LABEL: shuffle_v4i32_01zu:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_01zu:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 7, i32 undef>
+  ret <4 x i32> %shuffle
+}
+
  define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
  ; SSE-LABEL: insert_reg_and_zero_v4i32:
  ; SSE:       # BB#0:
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll

index 9d09c31b9f817b65a5449a22259aca7e338c5017..4eb4d626aa47f066c83e1730b88f39d6102498aa 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1917,3 +1917,17 @@ define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
    %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
    ret <8 x i16> %shuffle
  }
+
+define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_01u3zzuz:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_01u3zzuz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
+  ret <8 x i16> %shuffle
+}
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 3 Feb 2015 20:09:18 +0000 (20:09 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 3 Feb 2015 20:09:18 +0000 (20:09 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/combine-or.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-128-v16.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-128-v4.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-128-v8.ll		patch \| blob \| history