-; RUN: llc < %s -march=x86 -mcpu=core2 > %t1
-; RUN: grep movzwl %t1 | count 2
-; RUN: grep movzbl %t1 | count 1
-; RUN: grep movd %t1 | count 4
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
define <4 x i16> @a(i32* %x1) nounwind {
+; CHECK-LABEL: a:
+; CHECK: shrl %[[R:[^,]+]]
+; CHECK-NEXT: movd %[[R]], %xmm0
+; CHECK-NEXT: retl
+
%x2 = load i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i16
}
define <8 x i16> @b(i32* %x1) nounwind {
+; CHECK-LABEL: b:
+; CHECK: shrl %e[[R:.]]x
+; CHECK-NEXT: movzwl %[[R]]x, %e[[R]]x
+; CHECK-NEXT: movd %e[[R]]x, %xmm0
+; CHECK-NEXT: retl
+
%x2 = load i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i16
}
define <8 x i8> @c(i32* %x1) nounwind {
+; CHECK-LABEL: c:
+; CHECK: shrl %e[[R:.]]x
+; CHECK-NEXT: movzwl %[[R]]x, %e[[R]]x
+; CHECK-NEXT: movd %e[[R]]x, %xmm0
+; CHECK-NEXT: retl
+
%x2 = load i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i8
}
define <16 x i8> @d(i32* %x1) nounwind {
+; CHECK-LABEL: d:
+; CHECK: shrl %e[[R:.]]x
+; CHECK-NEXT: movzbl %[[R]]l, %e[[R]]x
+; CHECK-NEXT: movd %e[[R]]x, %xmm0
+; CHECK-NEXT: retl
+
%x2 = load i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i8
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -o %t
-; RUN: grep pshufd %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | FileCheck %s
-define <4 x float> @test(float %a) nounwind {
- %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1 ; <<4 x float>> [#uses=1]
- %tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
- %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp6
+define <4 x float> @test(float %a) {
+; CHECK-LABEL: test:
+; CHECK: movss {{.*}}, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; CHECK-NEXT: retl
+
+entry:
+ %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1
+ %tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2
+ %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+ ret <4 x float> %tmp6
}
-define <2 x i64> @test2(i32 %a) nounwind {
- %tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2 ; <<4 x i32>> [#uses=1]
- %tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3 ; <<4 x i32>> [#uses=1]
- %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64> ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %tmp10
+define <2 x i64> @test2(i32 %a) {
+; CHECK-LABEL: test2:
+; CHECK: movd {{.*}}, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
+; CHECK-NEXT: retl
+
+entry:
+ %tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2
+ %tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3
+ %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>
+ ret <2 x i64> %tmp10
}
+define <4 x float> @test3(<4 x float> %A) {
+; CHECK-LABEL: test3:
+; CHECK: xorps %[[X1:xmm[0-9]+]], %[[X1]]
+; CHECK-NEXT: movss %xmm0, %[[X1]]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = [[X1]][1,0,1,1]
+; CHECK-NEXT: retl
+
+ %tmp0 = extractelement <4 x float> %A, i32 0
+ %tmp1 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef >, float %tmp0, i32 1
+ %tmp2 = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 2
+ ret <4 x float> %tmp2
+}
+++ /dev/null
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
-
-define fastcc void @t(<4 x float> %A) nounwind {
- %tmp41896 = extractelement <4 x float> %A, i32 0 ; <float> [#uses=1]
- %tmp14082 = insertelement <4 x float> < float 0.000000e+00, float undef, float undef, float undef >, float %tmp41896, i32 1 ; <<4 x float>> [#uses=1]
- %tmp14083 = insertelement <4 x float> %tmp14082, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp14083, <4 x float>* null, align 16
- ret void
-}