1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
2 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
5 define double @test1(double %A) {
6 %1 = bitcast double %A to <2 x i32>
7 %add = add <2 x i32> %1, <i32 3, i32 5>
8 %2 = bitcast <2 x i32> %add to double
11 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
12 ; single paddd instruction. At the moment we produce the sequence
13 ; pshufd+paddq+pshufd. This is fixed with the widening legalization.
22 ; CHECK-WIDE-LABEL: test1
23 ; CHECK-WIDE-NOT: movsd
25 ; CHECK-WIDE-NEXT: ret
28 define double @test2(double %A, double %B) {
29 %1 = bitcast double %A to <2 x i32>
30 %2 = bitcast double %B to <2 x i32>
31 %add = add <2 x i32> %1, %2
32 %3 = bitcast <2 x i32> %add to double
40 ; CHECK-WIDE-LABEL: test2
41 ; CHECK-WIDE-NOT: movsd
43 ; CHECK-WIDE-NEXT: ret
46 define i64 @test3(i64 %A) {
47 %1 = bitcast i64 %A to <2 x float>
48 %add = fadd <2 x float> %1, <float 3.0, float 5.0>
49 %2 = bitcast <2 x float> %add to i64
58 ; CHECK-WIDE-LABEL: test3
59 ; CHECK-WIDE-NOT: pshufd
61 ; CHECK-WIDE-NOT: pshufd
65 define i64 @test4(i64 %A) {
66 %1 = bitcast i64 %A to <2 x i32>
67 %add = add <2 x i32> %1, <i32 3, i32 5>
68 %2 = bitcast <2 x i32> %add to i64
71 ; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd.
72 ; Ideally, we should fold that sequence into a single paddd. This is fixed with
73 ; the widening legalization.
81 ; CHECK-WIDE-LABEL: test4
82 ; CHECK-WIDE: movd %{{rdi|rcx}},
83 ; CHECK-WIDE-NEXT: paddd
84 ; CHECK-WIDE-NEXT: movd {{.*}}, %rax
88 define double @test5(double %A) {
89 %1 = bitcast double %A to <2 x float>
90 %add = fadd <2 x float> %1, <float 3.0, float 5.0>
91 %2 = bitcast <2 x float> %add to double
98 ; CHECK-WIDE-LABEL: test5
100 ; CHECK-WIDE-NEXT: ret
103 define double @test6(double %A) {
104 %1 = bitcast double %A to <4 x i16>
105 %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
106 %2 = bitcast <4 x i16> %add to double
109 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
110 ; single paddw instruction. This is fixed with the widening legalization.
119 ; CHECK-WIDE-LABEL: test6
120 ; CHECK-WIDE-NOT: mov
121 ; CHECK-WIDE-NOT: punpcklwd
123 ; CHECK-WIDE-NEXT: ret
126 define double @test7(double %A, double %B) {
127 %1 = bitcast double %A to <4 x i16>
128 %2 = bitcast double %B to <4 x i16>
129 %add = add <4 x i16> %1, %2
130 %3 = bitcast <4 x i16> %add to double
135 ; CHECK-NOT: punpcklwd
139 ; CHECK-WIDE-LABEL: test7
140 ; CHECK-WIDE-NOT: movsd
141 ; CHECK-WIDE-NOT: punpcklwd
143 ; CHECK-WIDE-NEXT: ret
146 define double @test8(double %A) {
147 %1 = bitcast double %A to <8 x i8>
148 %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
149 %2 = bitcast <8 x i8> %add to double
152 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
153 ; single paddb instruction. At the moment we produce the sequence
154 ; pshufd+paddw+pshufd. This is fixed with the widening legalization.
163 ; CHECK-WIDE-LABEL: test8
164 ; CHECK-WIDE-NOT: movsd
165 ; CHECK-WIDE-NOT: punpcklbw
167 ; CHECK-WIDE-NEXT: ret
170 define double @test9(double %A, double %B) {
171 %1 = bitcast double %A to <8 x i8>
172 %2 = bitcast double %B to <8 x i8>
173 %add = add <8 x i8> %1, %2
174 %3 = bitcast <8 x i8> %add to double
179 ; CHECK-NOT: punpcklbw
183 ; CHECK-WIDE-LABEL: test9
184 ; CHECK-WIDE-NOT: movsd
185 ; CHECK-WIDE-NOT: punpcklbw
187 ; CHECK-WIDE-NEXT: ret