1 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
2 ; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
3 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s
5 ; Ensure that the backend no longer emits unnecessary vector insert
6 ; instructions immediately after SSE scalar fp instructions
10 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
11 %1 = extractelement <4 x float> %b, i32 0
12 %2 = extractelement <4 x float> %a, i32 0
13 %add = fadd float %2, %1
14 %3 = insertelement <4 x float> %a, float %add, i32 0
18 ; CHECK-LABEL: test_add_ss
19 ; SSE2: addss %xmm1, %xmm0
20 ; AVX: vaddss %xmm1, %xmm0, %xmm0
25 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
26 %1 = extractelement <4 x float> %b, i32 0
27 %2 = extractelement <4 x float> %a, i32 0
28 %sub = fsub float %2, %1
29 %3 = insertelement <4 x float> %a, float %sub, i32 0
33 ; CHECK-LABEL: test_sub_ss
34 ; SSE2: subss %xmm1, %xmm0
35 ; AVX: vsubss %xmm1, %xmm0, %xmm0
39 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
40 %1 = extractelement <4 x float> %b, i32 0
41 %2 = extractelement <4 x float> %a, i32 0
42 %mul = fmul float %2, %1
43 %3 = insertelement <4 x float> %a, float %mul, i32 0
47 ; CHECK-LABEL: test_mul_ss
48 ; SSE2: mulss %xmm1, %xmm0
49 ; AVX: vmulss %xmm1, %xmm0, %xmm0
54 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
55 %1 = extractelement <4 x float> %b, i32 0
56 %2 = extractelement <4 x float> %a, i32 0
57 %div = fdiv float %2, %1
58 %3 = insertelement <4 x float> %a, float %div, i32 0
62 ; CHECK-LABEL: test_div_ss
63 ; SSE2: divss %xmm1, %xmm0
64 ; AVX: vdivss %xmm1, %xmm0, %xmm0
69 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
70 %1 = extractelement <2 x double> %b, i32 0
71 %2 = extractelement <2 x double> %a, i32 0
72 %add = fadd double %2, %1
73 %3 = insertelement <2 x double> %a, double %add, i32 0
77 ; CHECK-LABEL: test_add_sd
78 ; SSE2: addsd %xmm1, %xmm0
79 ; AVX: vaddsd %xmm1, %xmm0, %xmm0
84 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
85 %1 = extractelement <2 x double> %b, i32 0
86 %2 = extractelement <2 x double> %a, i32 0
87 %sub = fsub double %2, %1
88 %3 = insertelement <2 x double> %a, double %sub, i32 0
92 ; CHECK-LABEL: test_sub_sd
93 ; SSE2: subsd %xmm1, %xmm0
94 ; AVX: vsubsd %xmm1, %xmm0, %xmm0
99 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
100 %1 = extractelement <2 x double> %b, i32 0
101 %2 = extractelement <2 x double> %a, i32 0
102 %mul = fmul double %2, %1
103 %3 = insertelement <2 x double> %a, double %mul, i32 0
107 ; CHECK-LABEL: test_mul_sd
108 ; SSE2: mulsd %xmm1, %xmm0
109 ; AVX: vmulsd %xmm1, %xmm0, %xmm0
114 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
115 %1 = extractelement <2 x double> %b, i32 0
116 %2 = extractelement <2 x double> %a, i32 0
117 %div = fdiv double %2, %1
118 %3 = insertelement <2 x double> %a, double %div, i32 0
122 ; CHECK-LABEL: test_div_sd
123 ; SSE2: divsd %xmm1, %xmm0
124 ; AVX: vdivsd %xmm1, %xmm0, %xmm0
129 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
130 %1 = extractelement <4 x float> %a, i32 0
131 %2 = extractelement <4 x float> %b, i32 0
132 %add = fadd float %1, %2
133 %3 = insertelement <4 x float> %b, float %add, i32 0
137 ; CHECK-LABEL: test2_add_ss
138 ; SSE2: addss %xmm0, %xmm1
139 ; AVX: vaddss %xmm0, %xmm1, %xmm0
144 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
145 %1 = extractelement <4 x float> %a, i32 0
146 %2 = extractelement <4 x float> %b, i32 0
147 %sub = fsub float %2, %1
148 %3 = insertelement <4 x float> %b, float %sub, i32 0
152 ; CHECK-LABEL: test2_sub_ss
153 ; SSE2: subss %xmm0, %xmm1
154 ; AVX: vsubss %xmm0, %xmm1, %xmm0
159 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
160 %1 = extractelement <4 x float> %a, i32 0
161 %2 = extractelement <4 x float> %b, i32 0
162 %mul = fmul float %1, %2
163 %3 = insertelement <4 x float> %b, float %mul, i32 0
167 ; CHECK-LABEL: test2_mul_ss
168 ; SSE2: mulss %xmm0, %xmm1
169 ; AVX: vmulss %xmm0, %xmm1, %xmm0
174 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
175 %1 = extractelement <4 x float> %a, i32 0
176 %2 = extractelement <4 x float> %b, i32 0
177 %div = fdiv float %2, %1
178 %3 = insertelement <4 x float> %b, float %div, i32 0
182 ; CHECK-LABEL: test2_div_ss
183 ; SSE2: divss %xmm0, %xmm1
184 ; AVX: vdivss %xmm0, %xmm1, %xmm0
189 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
190 %1 = extractelement <2 x double> %a, i32 0
191 %2 = extractelement <2 x double> %b, i32 0
192 %add = fadd double %1, %2
193 %3 = insertelement <2 x double> %b, double %add, i32 0
197 ; CHECK-LABEL: test2_add_sd
198 ; SSE2: addsd %xmm0, %xmm1
199 ; AVX: vaddsd %xmm0, %xmm1, %xmm0
204 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
205 %1 = extractelement <2 x double> %a, i32 0
206 %2 = extractelement <2 x double> %b, i32 0
207 %sub = fsub double %2, %1
208 %3 = insertelement <2 x double> %b, double %sub, i32 0
212 ; CHECK-LABEL: test2_sub_sd
213 ; SSE2: subsd %xmm0, %xmm1
214 ; AVX: vsubsd %xmm0, %xmm1, %xmm0
219 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
220 %1 = extractelement <2 x double> %a, i32 0
221 %2 = extractelement <2 x double> %b, i32 0
222 %mul = fmul double %1, %2
223 %3 = insertelement <2 x double> %b, double %mul, i32 0
227 ; CHECK-LABEL: test2_mul_sd
228 ; SSE2: mulsd %xmm0, %xmm1
229 ; AVX: vmulsd %xmm0, %xmm1, %xmm0
234 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
235 %1 = extractelement <2 x double> %a, i32 0
236 %2 = extractelement <2 x double> %b, i32 0
237 %div = fdiv double %2, %1
238 %3 = insertelement <2 x double> %b, double %div, i32 0
242 ; CHECK-LABEL: test2_div_sd
243 ; SSE2: divsd %xmm0, %xmm1
244 ; AVX: vdivsd %xmm0, %xmm1, %xmm0
249 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
250 %1 = extractelement <4 x float> %b, i32 0
251 %2 = extractelement <4 x float> %a, i32 0
252 %add = fadd float %2, %1
253 %add2 = fadd float %2, %add
254 %3 = insertelement <4 x float> %a, float %add2, i32 0
258 ; CHECK-LABEL: test_multiple_add_ss
265 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
266 %1 = extractelement <4 x float> %b, i32 0
267 %2 = extractelement <4 x float> %a, i32 0
268 %sub = fsub float %2, %1
269 %sub2 = fsub float %2, %sub
270 %3 = insertelement <4 x float> %a, float %sub2, i32 0
274 ; CHECK-LABEL: test_multiple_sub_ss
281 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
282 %1 = extractelement <4 x float> %b, i32 0
283 %2 = extractelement <4 x float> %a, i32 0
284 %mul = fmul float %2, %1
285 %mul2 = fmul float %2, %mul
286 %3 = insertelement <4 x float> %a, float %mul2, i32 0
290 ; CHECK-LABEL: test_multiple_mul_ss
296 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
297 %1 = extractelement <4 x float> %b, i32 0
298 %2 = extractelement <4 x float> %a, i32 0
299 %div = fdiv float %2, %1
300 %div2 = fdiv float %2, %div
301 %3 = insertelement <4 x float> %a, float %div2, i32 0
305 ; CHECK-LABEL: test_multiple_div_ss