1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
2 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
5 define double @test1(double %A) {
6 %1 = bitcast double %A to <2 x i32>
7 %add = add <2 x i32> %1, <i32 3, i32 5>
8 %2 = bitcast <2 x i32> %add to double
11 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
12 ; single paddd instruction. At the moment we produce the sequence
13 ; pshufd+paddq+pshufd. This is fixed with the widening legalization.
22 ; CHECK-WIDE-LABEL: test1
23 ; CHECK-WIDE-NOT: movsd
25 ; CHECK-WIDE-NEXT: ret
28 define double @test2(double %A, double %B) {
29 %1 = bitcast double %A to <2 x i32>
30 %2 = bitcast double %B to <2 x i32>
31 %add = add <2 x i32> %1, %2
32 %3 = bitcast <2 x i32> %add to double
40 ; CHECK-WIDE-LABEL: test2
41 ; CHECK-WIDE-NOT: movsd
43 ; CHECK-WIDE-NEXT: ret
46 define i64 @test3(i64 %A) {
47 %1 = bitcast i64 %A to <2 x float>
48 %add = fadd <2 x float> %1, <float 3.0, float 5.0>
49 %2 = bitcast <2 x float> %add to i64
58 ; CHECK-WIDE-LABEL: test3
59 ; CHECK-WIDE-NOT: pshufd
61 ; CHECK-WIDE-NOT: pshufd
65 define i64 @test4(i64 %A) {
66 %1 = bitcast i64 %A to <2 x i32>
67 %add = add <2 x i32> %1, <i32 3, i32 5>
68 %2 = bitcast <2 x i32> %add to i64
71 ; FIXME: At the moment we still produce the sequence paddd+pshufd.
72 ; Ideally, we should fold that sequence into a single paddd. This is fixed with
73 ; the widening legalization.
82 ; CHECK-WIDE-LABEL: test4
83 ; CHECK-WIDE: movd %{{rdi|rcx}},
84 ; CHECK-WIDE-NEXT: paddd
85 ; CHECK-WIDE-NEXT: movd {{.*}}, %rax
89 define double @test5(double %A) {
90 %1 = bitcast double %A to <2 x float>
91 %add = fadd <2 x float> %1, <float 3.0, float 5.0>
92 %2 = bitcast <2 x float> %add to double
99 ; CHECK-WIDE-LABEL: test5
101 ; CHECK-WIDE-NEXT: ret
104 define double @test6(double %A) {
105 %1 = bitcast double %A to <4 x i16>
106 %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
107 %2 = bitcast <4 x i16> %add to double
110 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
111 ; single paddw instruction. This is fixed with the widening legalization.
120 ; CHECK-WIDE-LABEL: test6
121 ; CHECK-WIDE-NOT: mov
122 ; CHECK-WIDE-NOT: punpcklwd
124 ; CHECK-WIDE-NEXT: ret
127 define double @test7(double %A, double %B) {
128 %1 = bitcast double %A to <4 x i16>
129 %2 = bitcast double %B to <4 x i16>
130 %add = add <4 x i16> %1, %2
131 %3 = bitcast <4 x i16> %add to double
136 ; CHECK-NOT: punpcklwd
140 ; CHECK-WIDE-LABEL: test7
141 ; CHECK-WIDE-NOT: movsd
142 ; CHECK-WIDE-NOT: punpcklwd
144 ; CHECK-WIDE-NEXT: ret
147 define double @test8(double %A) {
148 %1 = bitcast double %A to <8 x i8>
149 %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
150 %2 = bitcast <8 x i8> %add to double
153 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
154 ; single paddb instruction. At the moment we produce the sequence
155 ; pshufd+paddw+pshufd. This is fixed with the widening legalization.
164 ; CHECK-WIDE-LABEL: test8
165 ; CHECK-WIDE-NOT: movsd
166 ; CHECK-WIDE-NOT: punpcklbw
168 ; CHECK-WIDE-NEXT: ret
171 define double @test9(double %A, double %B) {
172 %1 = bitcast double %A to <8 x i8>
173 %2 = bitcast double %B to <8 x i8>
174 %add = add <8 x i8> %1, %2
175 %3 = bitcast <8 x i8> %add to double
180 ; CHECK-NOT: punpcklbw
184 ; CHECK-WIDE-LABEL: test9
185 ; CHECK-WIDE-NOT: movsd
186 ; CHECK-WIDE-NOT: punpcklbw
188 ; CHECK-WIDE-NEXT: ret