fix the buildvector->insertp[sd] logic to not always create a redundant

author Chris Lattner <sabre@nondot.org>

Sat, 28 Aug 2010 17:59:08 +0000 (17:59 +0000)

committer Chris Lattner <sabre@nondot.org>

Sat, 28 Aug 2010 17:59:08 +0000 (17:59 +0000)
author Chris Lattner <sabre@nondot.org>
Sat, 28 Aug 2010 17:59:08 +0000 (17:59 +0000)
committer Chris Lattner <sabre@nondot.org>
Sat, 28 Aug 2010 17:59:08 +0000 (17:59 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 81416341658d9b386e414e04f577edd781ab43b1..bffa406d84c0fba9fe812680c9a11fc0dfe68c31 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4278,14 +4278,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
      if (LD.getNode())
        return LD;
      
-    // For SSE 4.1, use inserts into undef.  
+    // For SSE 4.1, use insertps to put the high elements into the low element. 
      if (getSubtarget()->hasSSE41()) {
-      V[0] = DAG.getUNDEF(VT);
-      for (unsigned i = 0; i < NumElems; ++i)
-        if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
-          V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0],
+      SDValue Result;
+      if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+        Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+      else
+        Result = DAG.getUNDEF(VT);
+      
+      for (unsigned i = 1; i < NumElems; ++i) {
+        if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
                               Op.getOperand(i), DAG.getIntPtrConstant(i));
-      return V[0];
+      }
+      return Result;
      }
      
      // Otherwise, expand into a number of unpckl*, start by extending each of
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll

index 4a97ac35afc7feddcf70f99fac3a24b017810f3c..bb01e5afceff14deb701a8b1fa96e5a4c7f23ba3 100644 (file)
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
  ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
  ; rdar://6627786
  ; rdar://7792037
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll

index ef66d1a44a187862ed99a473117b805c83e6a701..3a14fa26300c3dc86ca52412bae4d12e77ee9cc1 100644 (file)
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
  declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
  declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
  
+; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
+; pointless.
+define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind  {
+entry:
+  %tmp7 = extractelement <2 x float> %A, i32 0
+  %tmp5 = extractelement <2 x float> %A, i32 1
+  %tmp3 = extractelement <2 x float> %B, i32 0
+  %tmp1 = extractelement <2 x float> %B, i32 1
+  %add.r = fadd float %tmp7, %tmp3
+  %add.i = fadd float %tmp5, %tmp1
+  %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+  %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+  ret <2 x float> %tmp9
+; X32: buildvector:
+; X32-NOT: insertps $0
+; X32: insertps $16
+; X32-NOT: insertps $0
+; X32: ret
+; X64: buildvector:
+; X64-NOT: insertps $0
+; X64: insertps $16
+; X64-NOT: insertps $0
+; X64: ret
+}
+
diff --git a/test/CodeGen/X86/vec_insert-9.ll b/test/CodeGen/X86/vec_insert-9.ll

index 2e829df1f8dfd2bef497c56abe26850ea9b1ba70..e5a7ccc5ef94f0fe54d49b49b5fe841bfa805d4a 100644 (file)
--- a/test/CodeGen/X86/vec_insert-9.ll
+++ b/test/CodeGen/X86/vec_insert-9.ll
@@ -1,5 +1,5 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
-; RUN: grep pinsrd %t | count 2
+; RUN: grep pinsrd %t | count 1
  
  define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
  entry:
author	Chris Lattner <sabre@nondot.org>
	Sat, 28 Aug 2010 17:59:08 +0000 (17:59 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Sat, 28 Aug 2010 17:59:08 +0000 (17:59 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/2009-02-26-MachineLICMBug.ll		patch \| blob \| history
test/CodeGen/X86/sse41.ll		patch \| blob \| history
test/CodeGen/X86/vec_insert-9.ll		patch \| blob \| history