Tidy up this testcase and add test for tailcall optimization

[oota-llvm.git] / test / CodeGen / X86 / vec_shuffle-3.ll
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll

index a1eac1f1ec1b24cf6bb5a9ee58b675542f9860c5..f4930b084504dcf41bff6dfaf74b128c9cfd7dbf 100644 (file)
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ b/test/CodeGen/X86/vec_shuffle-3.ll
@@ -1,19 +1,20 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movlhps | wc -l | grep 1 &&
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movhlps | wc -l | grep 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movlhps %t | count 1
+; RUN: grep movhlps %t | count 1
  
-<4 x float> %test1(<4 x float>* %x, <4 x float>* %y) {
-       %tmp = load <4 x float>* %y
-       %tmp5 = load <4 x float>* %x
-       %tmp9 = add <4 x float> %tmp5, %tmp
-       %tmp21 = sub <4 x float> %tmp5, %tmp
-       %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x uint> < uint 0, uint 1, uint 4, uint 5 >
-       ret <4 x float> %tmp27
+define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) {
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
+        %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
+        %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
+        %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp27
  }
  
-<4 x float> %movhl(<4 x float>* %x, <4 x float>* %y) {
+define <4 x float> @movhl(<4 x float>* %x, <4 x float>* %y) {
  entry:
-       %tmp = load <4 x float>* %y
-       %tmp3 = load <4 x float>* %x
-       %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x uint> < uint 2, uint 3, uint 6, uint 7 >
-       ret <4 x float> %tmp4
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
+        %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp4
  }