1 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
2 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
3 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
5 target triple = "x86_64-unknown-unknown"
7 ; Ensure that the backend no longer emits unnecessary vector insert
8 ; instructions immediately after SSE scalar fp instructions
11 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
12 ; SSE-LABEL: test_add_ss:
14 ; SSE-NEXT: addss %xmm1, %xmm0
17 ; AVX-LABEL: test_add_ss:
19 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
21 %1 = extractelement <4 x float> %b, i32 0
22 %2 = extractelement <4 x float> %a, i32 0
23 %add = fadd float %2, %1
24 %3 = insertelement <4 x float> %a, float %add, i32 0
28 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
29 ; SSE-LABEL: test_sub_ss:
31 ; SSE-NEXT: subss %xmm1, %xmm0
34 ; AVX-LABEL: test_sub_ss:
36 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
38 %1 = extractelement <4 x float> %b, i32 0
39 %2 = extractelement <4 x float> %a, i32 0
40 %sub = fsub float %2, %1
41 %3 = insertelement <4 x float> %a, float %sub, i32 0
45 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
46 ; SSE-LABEL: test_mul_ss:
48 ; SSE-NEXT: mulss %xmm1, %xmm0
51 ; AVX-LABEL: test_mul_ss:
53 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
55 %1 = extractelement <4 x float> %b, i32 0
56 %2 = extractelement <4 x float> %a, i32 0
57 %mul = fmul float %2, %1
58 %3 = insertelement <4 x float> %a, float %mul, i32 0
62 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
63 ; SSE-LABEL: test_div_ss:
65 ; SSE-NEXT: divss %xmm1, %xmm0
68 ; AVX-LABEL: test_div_ss:
70 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
72 %1 = extractelement <4 x float> %b, i32 0
73 %2 = extractelement <4 x float> %a, i32 0
74 %div = fdiv float %2, %1
75 %3 = insertelement <4 x float> %a, float %div, i32 0
79 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
80 ; SSE-LABEL: test_add_sd:
82 ; SSE-NEXT: addsd %xmm1, %xmm0
85 ; AVX-LABEL: test_add_sd:
87 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
89 %1 = extractelement <2 x double> %b, i32 0
90 %2 = extractelement <2 x double> %a, i32 0
91 %add = fadd double %2, %1
92 %3 = insertelement <2 x double> %a, double %add, i32 0
96 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
97 ; SSE-LABEL: test_sub_sd:
99 ; SSE-NEXT: subsd %xmm1, %xmm0
102 ; AVX-LABEL: test_sub_sd:
104 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
106 %1 = extractelement <2 x double> %b, i32 0
107 %2 = extractelement <2 x double> %a, i32 0
108 %sub = fsub double %2, %1
109 %3 = insertelement <2 x double> %a, double %sub, i32 0
113 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
114 ; SSE-LABEL: test_mul_sd:
116 ; SSE-NEXT: mulsd %xmm1, %xmm0
119 ; AVX-LABEL: test_mul_sd:
121 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
123 %1 = extractelement <2 x double> %b, i32 0
124 %2 = extractelement <2 x double> %a, i32 0
125 %mul = fmul double %2, %1
126 %3 = insertelement <2 x double> %a, double %mul, i32 0
130 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
131 ; SSE-LABEL: test_div_sd:
133 ; SSE-NEXT: divsd %xmm1, %xmm0
136 ; AVX-LABEL: test_div_sd:
138 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
140 %1 = extractelement <2 x double> %b, i32 0
141 %2 = extractelement <2 x double> %a, i32 0
142 %div = fdiv double %2, %1
143 %3 = insertelement <2 x double> %a, double %div, i32 0
147 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
148 ; SSE-LABEL: test2_add_ss:
150 ; SSE-NEXT: addss %xmm0, %xmm1
151 ; SSE-NEXT: movaps %xmm1, %xmm0
154 ; AVX-LABEL: test2_add_ss:
156 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
158 %1 = extractelement <4 x float> %a, i32 0
159 %2 = extractelement <4 x float> %b, i32 0
160 %add = fadd float %1, %2
161 %3 = insertelement <4 x float> %b, float %add, i32 0
165 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
166 ; SSE-LABEL: test2_sub_ss:
168 ; SSE-NEXT: subss %xmm0, %xmm1
169 ; SSE-NEXT: movaps %xmm1, %xmm0
172 ; AVX-LABEL: test2_sub_ss:
174 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
176 %1 = extractelement <4 x float> %a, i32 0
177 %2 = extractelement <4 x float> %b, i32 0
178 %sub = fsub float %2, %1
179 %3 = insertelement <4 x float> %b, float %sub, i32 0
183 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
184 ; SSE-LABEL: test2_mul_ss:
186 ; SSE-NEXT: mulss %xmm0, %xmm1
187 ; SSE-NEXT: movaps %xmm1, %xmm0
190 ; AVX-LABEL: test2_mul_ss:
192 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
194 %1 = extractelement <4 x float> %a, i32 0
195 %2 = extractelement <4 x float> %b, i32 0
196 %mul = fmul float %1, %2
197 %3 = insertelement <4 x float> %b, float %mul, i32 0
201 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
202 ; SSE-LABEL: test2_div_ss:
204 ; SSE-NEXT: divss %xmm0, %xmm1
205 ; SSE-NEXT: movaps %xmm1, %xmm0
208 ; AVX-LABEL: test2_div_ss:
210 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
212 %1 = extractelement <4 x float> %a, i32 0
213 %2 = extractelement <4 x float> %b, i32 0
214 %div = fdiv float %2, %1
215 %3 = insertelement <4 x float> %b, float %div, i32 0
219 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
220 ; SSE-LABEL: test2_add_sd:
222 ; SSE-NEXT: addsd %xmm0, %xmm1
223 ; SSE-NEXT: movaps %xmm1, %xmm0
226 ; AVX-LABEL: test2_add_sd:
228 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
230 %1 = extractelement <2 x double> %a, i32 0
231 %2 = extractelement <2 x double> %b, i32 0
232 %add = fadd double %1, %2
233 %3 = insertelement <2 x double> %b, double %add, i32 0
237 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
238 ; SSE-LABEL: test2_sub_sd:
240 ; SSE-NEXT: subsd %xmm0, %xmm1
241 ; SSE-NEXT: movaps %xmm1, %xmm0
244 ; AVX-LABEL: test2_sub_sd:
246 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
248 %1 = extractelement <2 x double> %a, i32 0
249 %2 = extractelement <2 x double> %b, i32 0
250 %sub = fsub double %2, %1
251 %3 = insertelement <2 x double> %b, double %sub, i32 0
255 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
256 ; SSE-LABEL: test2_mul_sd:
258 ; SSE-NEXT: mulsd %xmm0, %xmm1
259 ; SSE-NEXT: movaps %xmm1, %xmm0
262 ; AVX-LABEL: test2_mul_sd:
264 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
266 %1 = extractelement <2 x double> %a, i32 0
267 %2 = extractelement <2 x double> %b, i32 0
268 %mul = fmul double %1, %2
269 %3 = insertelement <2 x double> %b, double %mul, i32 0
273 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
274 ; SSE-LABEL: test2_div_sd:
276 ; SSE-NEXT: divsd %xmm0, %xmm1
277 ; SSE-NEXT: movaps %xmm1, %xmm0
280 ; AVX-LABEL: test2_div_sd:
282 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
284 %1 = extractelement <2 x double> %a, i32 0
285 %2 = extractelement <2 x double> %b, i32 0
286 %div = fdiv double %2, %1
287 %3 = insertelement <2 x double> %b, double %div, i32 0
291 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
292 ; SSE-LABEL: test_multiple_add_ss:
294 ; SSE-NEXT: addss %xmm0, %xmm1
295 ; SSE-NEXT: addss %xmm1, %xmm0
298 ; AVX-LABEL: test_multiple_add_ss:
300 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1
301 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
303 %1 = extractelement <4 x float> %b, i32 0
304 %2 = extractelement <4 x float> %a, i32 0
305 %add = fadd float %2, %1
306 %add2 = fadd float %2, %add
307 %3 = insertelement <4 x float> %a, float %add2, i32 0
311 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
312 ; SSE-LABEL: test_multiple_sub_ss:
314 ; SSE-NEXT: movaps %xmm0, %xmm2
315 ; SSE-NEXT: subss %xmm1, %xmm2
316 ; SSE-NEXT: subss %xmm2, %xmm0
319 ; AVX-LABEL: test_multiple_sub_ss:
321 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1
322 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
324 %1 = extractelement <4 x float> %b, i32 0
325 %2 = extractelement <4 x float> %a, i32 0
326 %sub = fsub float %2, %1
327 %sub2 = fsub float %2, %sub
328 %3 = insertelement <4 x float> %a, float %sub2, i32 0
332 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
333 ; SSE-LABEL: test_multiple_mul_ss:
335 ; SSE-NEXT: mulss %xmm0, %xmm1
336 ; SSE-NEXT: mulss %xmm1, %xmm0
339 ; AVX-LABEL: test_multiple_mul_ss:
341 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1
342 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
344 %1 = extractelement <4 x float> %b, i32 0
345 %2 = extractelement <4 x float> %a, i32 0
346 %mul = fmul float %2, %1
347 %mul2 = fmul float %2, %mul
348 %3 = insertelement <4 x float> %a, float %mul2, i32 0
352 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
353 ; SSE-LABEL: test_multiple_div_ss:
355 ; SSE-NEXT: movaps %xmm0, %xmm2
356 ; SSE-NEXT: divss %xmm1, %xmm2
357 ; SSE-NEXT: divss %xmm2, %xmm0
360 ; AVX-LABEL: test_multiple_div_ss:
362 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1
363 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
365 %1 = extractelement <4 x float> %b, i32 0
366 %2 = extractelement <4 x float> %a, i32 0
367 %div = fdiv float %2, %1
368 %div2 = fdiv float %2, %div
369 %3 = insertelement <4 x float> %a, float %div2, i32 0
373 ; Ensure that the backend selects SSE/AVX scalar fp instructions
374 ; from a packed fp instrution plus a vector insert.
376 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
377 ; SSE-LABEL: insert_test_add_ss:
379 ; SSE-NEXT: addss %xmm1, %xmm0
382 ; AVX-LABEL: insert_test_add_ss:
384 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
386 %1 = fadd <4 x float> %a, %b
387 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
391 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
392 ; SSE-LABEL: insert_test_sub_ss:
394 ; SSE-NEXT: subss %xmm1, %xmm0
397 ; AVX-LABEL: insert_test_sub_ss:
399 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
401 %1 = fsub <4 x float> %a, %b
402 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
406 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
407 ; SSE-LABEL: insert_test_mul_ss:
409 ; SSE-NEXT: mulss %xmm1, %xmm0
412 ; AVX-LABEL: insert_test_mul_ss:
414 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
416 %1 = fmul <4 x float> %a, %b
417 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
421 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
422 ; SSE-LABEL: insert_test_div_ss:
424 ; SSE-NEXT: divss %xmm1, %xmm0
427 ; AVX-LABEL: insert_test_div_ss:
429 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
431 %1 = fdiv <4 x float> %a, %b
432 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
436 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
437 ; SSE-LABEL: insert_test_add_sd:
439 ; SSE-NEXT: addsd %xmm1, %xmm0
442 ; AVX-LABEL: insert_test_add_sd:
444 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
446 %1 = fadd <2 x double> %a, %b
447 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
451 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
452 ; SSE-LABEL: insert_test_sub_sd:
454 ; SSE-NEXT: subsd %xmm1, %xmm0
457 ; AVX-LABEL: insert_test_sub_sd:
459 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
461 %1 = fsub <2 x double> %a, %b
462 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
466 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
467 ; SSE-LABEL: insert_test_mul_sd:
469 ; SSE-NEXT: mulsd %xmm1, %xmm0
472 ; AVX-LABEL: insert_test_mul_sd:
474 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
476 %1 = fmul <2 x double> %a, %b
477 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
481 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
482 ; SSE-LABEL: insert_test_div_sd:
484 ; SSE-NEXT: divsd %xmm1, %xmm0
487 ; AVX-LABEL: insert_test_div_sd:
489 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
491 %1 = fdiv <2 x double> %a, %b
492 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
496 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
497 ; SSE-LABEL: insert_test2_add_ss:
499 ; SSE-NEXT: addss %xmm0, %xmm1
500 ; SSE-NEXT: movaps %xmm1, %xmm0
503 ; AVX-LABEL: insert_test2_add_ss:
505 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
507 %1 = fadd <4 x float> %b, %a
508 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
512 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
513 ; SSE-LABEL: insert_test2_sub_ss:
515 ; SSE-NEXT: subss %xmm0, %xmm1
516 ; SSE-NEXT: movaps %xmm1, %xmm0
519 ; AVX-LABEL: insert_test2_sub_ss:
521 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
523 %1 = fsub <4 x float> %b, %a
524 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
528 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
529 ; SSE-LABEL: insert_test2_mul_ss:
531 ; SSE-NEXT: mulss %xmm0, %xmm1
532 ; SSE-NEXT: movaps %xmm1, %xmm0
535 ; AVX-LABEL: insert_test2_mul_ss:
537 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
539 %1 = fmul <4 x float> %b, %a
540 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
544 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
545 ; SSE-LABEL: insert_test2_div_ss:
547 ; SSE-NEXT: divss %xmm0, %xmm1
548 ; SSE-NEXT: movaps %xmm1, %xmm0
551 ; AVX-LABEL: insert_test2_div_ss:
553 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
555 %1 = fdiv <4 x float> %b, %a
556 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
560 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
561 ; SSE-LABEL: insert_test2_add_sd:
563 ; SSE-NEXT: addsd %xmm0, %xmm1
564 ; SSE-NEXT: movaps %xmm1, %xmm0
567 ; AVX-LABEL: insert_test2_add_sd:
569 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
571 %1 = fadd <2 x double> %b, %a
572 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
576 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
577 ; SSE-LABEL: insert_test2_sub_sd:
579 ; SSE-NEXT: subsd %xmm0, %xmm1
580 ; SSE-NEXT: movaps %xmm1, %xmm0
583 ; AVX-LABEL: insert_test2_sub_sd:
585 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
587 %1 = fsub <2 x double> %b, %a
588 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
592 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
593 ; SSE-LABEL: insert_test2_mul_sd:
595 ; SSE-NEXT: mulsd %xmm0, %xmm1
596 ; SSE-NEXT: movaps %xmm1, %xmm0
599 ; AVX-LABEL: insert_test2_mul_sd:
601 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
603 %1 = fmul <2 x double> %b, %a
604 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
608 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
609 ; SSE-LABEL: insert_test2_div_sd:
611 ; SSE-NEXT: divsd %xmm0, %xmm1
612 ; SSE-NEXT: movaps %xmm1, %xmm0
615 ; AVX-LABEL: insert_test2_div_sd:
617 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
619 %1 = fdiv <2 x double> %b, %a
620 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
624 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
625 ; SSE-LABEL: insert_test3_add_ss:
627 ; SSE-NEXT: addss %xmm1, %xmm0
630 ; AVX-LABEL: insert_test3_add_ss:
632 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
634 %1 = fadd <4 x float> %a, %b
635 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
639 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
640 ; SSE-LABEL: insert_test3_sub_ss:
642 ; SSE-NEXT: subss %xmm1, %xmm0
645 ; AVX-LABEL: insert_test3_sub_ss:
647 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
649 %1 = fsub <4 x float> %a, %b
650 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
654 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
655 ; SSE-LABEL: insert_test3_mul_ss:
657 ; SSE-NEXT: mulss %xmm1, %xmm0
660 ; AVX-LABEL: insert_test3_mul_ss:
662 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
664 %1 = fmul <4 x float> %a, %b
665 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
669 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
670 ; SSE-LABEL: insert_test3_div_ss:
672 ; SSE-NEXT: divss %xmm1, %xmm0
675 ; AVX-LABEL: insert_test3_div_ss:
677 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
679 %1 = fdiv <4 x float> %a, %b
680 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
684 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
685 ; SSE-LABEL: insert_test3_add_sd:
687 ; SSE-NEXT: addsd %xmm1, %xmm0
690 ; AVX-LABEL: insert_test3_add_sd:
692 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
694 %1 = fadd <2 x double> %a, %b
695 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
699 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
700 ; SSE-LABEL: insert_test3_sub_sd:
702 ; SSE-NEXT: subsd %xmm1, %xmm0
705 ; AVX-LABEL: insert_test3_sub_sd:
707 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
709 %1 = fsub <2 x double> %a, %b
710 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
714 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
715 ; SSE-LABEL: insert_test3_mul_sd:
717 ; SSE-NEXT: mulsd %xmm1, %xmm0
720 ; AVX-LABEL: insert_test3_mul_sd:
722 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
724 %1 = fmul <2 x double> %a, %b
725 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
729 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
730 ; SSE-LABEL: insert_test3_div_sd:
732 ; SSE-NEXT: divsd %xmm1, %xmm0
735 ; AVX-LABEL: insert_test3_div_sd:
737 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
739 %1 = fdiv <2 x double> %a, %b
740 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
744 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
745 ; SSE-LABEL: insert_test4_add_ss:
747 ; SSE-NEXT: addss %xmm0, %xmm1
748 ; SSE-NEXT: movaps %xmm1, %xmm0
751 ; AVX-LABEL: insert_test4_add_ss:
753 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
755 %1 = fadd <4 x float> %b, %a
756 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
760 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
761 ; SSE-LABEL: insert_test4_sub_ss:
763 ; SSE-NEXT: subss %xmm0, %xmm1
764 ; SSE-NEXT: movaps %xmm1, %xmm0
767 ; AVX-LABEL: insert_test4_sub_ss:
769 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
771 %1 = fsub <4 x float> %b, %a
772 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
776 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
777 ; SSE-LABEL: insert_test4_mul_ss:
779 ; SSE-NEXT: mulss %xmm0, %xmm1
780 ; SSE-NEXT: movaps %xmm1, %xmm0
783 ; AVX-LABEL: insert_test4_mul_ss:
785 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
787 %1 = fmul <4 x float> %b, %a
788 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
792 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
793 ; SSE-LABEL: insert_test4_div_ss:
795 ; SSE-NEXT: divss %xmm0, %xmm1
796 ; SSE-NEXT: movaps %xmm1, %xmm0
799 ; AVX-LABEL: insert_test4_div_ss:
801 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
803 %1 = fdiv <4 x float> %b, %a
804 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
808 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
809 ; SSE-LABEL: insert_test4_add_sd:
811 ; SSE-NEXT: addsd %xmm0, %xmm1
812 ; SSE-NEXT: movaps %xmm1, %xmm0
815 ; AVX-LABEL: insert_test4_add_sd:
817 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
819 %1 = fadd <2 x double> %b, %a
820 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
824 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
825 ; SSE-LABEL: insert_test4_sub_sd:
827 ; SSE-NEXT: subsd %xmm0, %xmm1
828 ; SSE-NEXT: movaps %xmm1, %xmm0
831 ; AVX-LABEL: insert_test4_sub_sd:
833 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
835 %1 = fsub <2 x double> %b, %a
836 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
840 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
841 ; SSE-LABEL: insert_test4_mul_sd:
843 ; SSE-NEXT: mulsd %xmm0, %xmm1
844 ; SSE-NEXT: movaps %xmm1, %xmm0
847 ; AVX-LABEL: insert_test4_mul_sd:
849 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
851 %1 = fmul <2 x double> %b, %a
852 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
856 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
857 ; SSE-LABEL: insert_test4_div_sd:
859 ; SSE-NEXT: divsd %xmm0, %xmm1
860 ; SSE-NEXT: movaps %xmm1, %xmm0
863 ; AVX-LABEL: insert_test4_div_sd:
865 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
867 %1 = fdiv <2 x double> %b, %a
868 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1