[x86] Hoist the zext-lowering up in the v4i32 lowering routine -- it is

author Chandler Carruth <chandlerc@gmail.com>

Wed, 1 Oct 2014 02:25:54 +0000 (02:25 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Wed, 1 Oct 2014 02:25:54 +0000 (02:25 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Wed, 1 Oct 2014 02:25:54 +0000 (02:25 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Wed, 1 Oct 2014 02:25:54 +0000 (02:25 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 394a62210561dd9007141514ba85d4baf6d244be..947cd01308bf199ec908924d051afc0f46b7d73d 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8214,6 +8214,13 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
    ArrayRef<int> Mask = SVOp->getMask();
    assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
  
+  // Whenever we can lower this as a zext, that instruction is strictly faster
+  // than any alternative. It also allows us to fold memory operansd into the
+  // shuffle in many cases.
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2,
+                                                         Mask, Subtarget, DAG))
+    return ZExt;
+
    int NumV2Elements =
        std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
  
@@ -8239,12 +8246,6 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                         getV4X86ShuffleImm8ForMask(Mask, DAG));
    }
  
-  // Whenever we can lower this as a zext, that instruction is strictly faster
-  // than any alternative.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2,
-                                                         Mask, Subtarget, DAG))
-    return ZExt;
-
    // Use dedicated unpack instructions for masks that match their pattern.
    if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll

index 080cd439476656e4dcd243640458803d5a61b454..948f1d2e47afa04522f4944f5284d2dc8ea0d630 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -937,14 +937,29 @@ define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
  }
  
  define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: shuffle_v4i32_0u1u:
-; SSE:       # BB#0:
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE-NEXT:    retq
+; SSE2-LABEL: shuffle_v4i32_0u1u:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_0u1u:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_0u1u:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_0u1u:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxdq %xmm0, %xmm0
+; SSE41-NEXT:    retq
  ;
  ; AVX-LABEL: shuffle_v4i32_0u1u:
  ; AVX:       # BB#0:
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX-NEXT:    vpmovzxdq %xmm0, %xmm0
  ; AVX-NEXT:    retq
    %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
    ret <4 x i32> %shuffle
author	Chandler Carruth <chandlerc@gmail.com>
	Wed, 1 Oct 2014 02:25:54 +0000 (02:25 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Wed, 1 Oct 2014 02:25:54 +0000 (02:25 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-128-v4.ll		patch \| blob \| history