1 ; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
2 target datalayout = "E-m:e-i64:64-n32:64"
3 target triple = "powerpc64-unknown-linux-gnu"
5 define double @test1(double %a, double %b) {
7 %v = fmul double %a, %b
11 ; CHECK: xsmuldp 1, 1, 2
15 define double @test2(double %a, double %b) {
17 %v = fdiv double %a, %b
21 ; CHECK: xsdivdp 1, 1, 2
25 define double @test3(double %a, double %b) {
27 %v = fadd double %a, %b
31 ; CHECK: xsadddp 1, 1, 2
35 define <2 x double> @test4(<2 x double> %a, <2 x double> %b) {
37 %v = fadd <2 x double> %a, %b
41 ; CHECK: xvadddp 34, 34, 35
45 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
47 %v = xor <4 x i32> %a, %b
51 ; CHECK: xxlxor 34, 34, 35
55 define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
57 %v = xor <8 x i16> %a, %b
61 ; CHECK: xxlxor 34, 34, 35
65 define <16 x i8> @test7(<16 x i8> %a, <16 x i8> %b) {
67 %v = xor <16 x i8> %a, %b
71 ; CHECK: xxlxor 34, 34, 35
75 define <4 x i32> @test8(<4 x i32> %a, <4 x i32> %b) {
77 %v = or <4 x i32> %a, %b
81 ; CHECK: xxlor 34, 34, 35
85 define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
87 %v = or <8 x i16> %a, %b
91 ; CHECK: xxlor 34, 34, 35
95 define <16 x i8> @test10(<16 x i8> %a, <16 x i8> %b) {
97 %v = or <16 x i8> %a, %b
100 ; CHECK-LABEL: @test10
101 ; CHECK: xxlor 34, 34, 35
105 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
107 %v = and <4 x i32> %a, %b
110 ; CHECK-LABEL: @test11
111 ; CHECK: xxland 34, 34, 35
115 define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
117 %v = and <8 x i16> %a, %b
120 ; CHECK-LABEL: @test12
121 ; CHECK: xxland 34, 34, 35
125 define <16 x i8> @test13(<16 x i8> %a, <16 x i8> %b) {
127 %v = and <16 x i8> %a, %b
130 ; CHECK-LABEL: @test13
131 ; CHECK: xxland 34, 34, 35
135 define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) {
137 %v = or <4 x i32> %a, %b
138 %w = xor <4 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1>
141 ; CHECK-LABEL: @test14
142 ; CHECK: xxlnor 34, 34, 35
146 define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
148 %v = or <8 x i16> %a, %b
149 %w = xor <8 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
152 ; CHECK-LABEL: @test15
153 ; CHECK: xxlnor 34, 34, 35
157 define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) {
159 %v = or <16 x i8> %a, %b
160 %w = xor <16 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
163 ; CHECK-LABEL: @test16
164 ; CHECK: xxlnor 34, 34, 35
168 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
170 %w = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
171 %v = and <4 x i32> %a, %w
174 ; CHECK-LABEL: @test17
175 ; CHECK: xxlandc 34, 34, 35
179 define <8 x i16> @test18(<8 x i16> %a, <8 x i16> %b) {
181 %w = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
182 %v = and <8 x i16> %a, %w
185 ; CHECK-LABEL: @test18
186 ; CHECK: xxlandc 34, 34, 35
190 define <16 x i8> @test19(<16 x i8> %a, <16 x i8> %b) {
192 %w = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
193 %v = and <16 x i8> %a, %w
196 ; CHECK-LABEL: @test19
197 ; CHECK: xxlandc 34, 34, 35
201 define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
203 %m = icmp eq <4 x i32> %c, %d
204 %v = select <4 x i1> %m, <4 x i32> %a, <4 x i32> %b
207 ; CHECK-LABEL: @test20
208 ; CHECK: vcmpequw {{[0-9]+}}, 4, 5
209 ; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
213 define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
215 %m = fcmp oeq <4 x float> %c, %d
216 %v = select <4 x i1> %m, <4 x float> %a, <4 x float> %b
219 ; CHECK-LABEL: @test21
220 ; CHECK: xvcmpeqsp [[V1:[0-9]+]], 36, 37
221 ; CHECK: xxsel 34, 35, 34, [[V1]]
225 define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
227 %m = fcmp ueq <4 x float> %c, %d
228 %v = select <4 x i1> %m, <4 x float> %a, <4 x float> %b
231 ; CHECK-LABEL: @test22
232 ; CHECK-DAG: xvcmpeqsp {{[0-9]+}}, 37, 37
233 ; CHECK-DAG: xvcmpeqsp {{[0-9]+}}, 36, 36
234 ; CHECK-DAG: xvcmpeqsp {{[0-9]+}}, 36, 37
239 ; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
243 define <8 x i16> @test23(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
245 %m = icmp eq <8 x i16> %c, %d
246 %v = select <8 x i1> %m, <8 x i16> %a, <8 x i16> %b
249 ; CHECK-LABEL: @test23
250 ; CHECK: vcmpequh {{[0-9]+}}, 4, 5
251 ; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
255 define <16 x i8> @test24(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
257 %m = icmp eq <16 x i8> %c, %d
258 %v = select <16 x i1> %m, <16 x i8> %a, <16 x i8> %b
261 ; CHECK-LABEL: @test24
262 ; CHECK: vcmpequb {{[0-9]+}}, 4, 5
263 ; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
267 define <2 x double> @test25(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
269 %m = fcmp oeq <2 x double> %c, %d
270 %v = select <2 x i1> %m, <2 x double> %a, <2 x double> %b
273 ; CHECK-LABEL: @test25
274 ; CHECK: xvcmpeqdp [[V1:[0-9]+]], 36, 37
275 ; CHECK: xxsel 34, 35, 34, [[V1]]
279 define <2 x i64> @test26(<2 x i64> %a, <2 x i64> %b) {
280 %v = add <2 x i64> %a, %b
283 ; CHECK-LABEL: @test26
285 ; Make sure we use only two stores (one for each operand).
290 ; FIXME: The code quality here is not good; just make sure we do something for now.
296 define <2 x i64> @test27(<2 x i64> %a, <2 x i64> %b) {
297 %v = and <2 x i64> %a, %b
300 ; CHECK-LABEL: @test27
301 ; CHECK: xxland 34, 34, 35
305 define <2 x double> @test28(<2 x double>* %a) {
306 %v = load <2 x double>* %a, align 16
309 ; CHECK-LABEL: @test28
310 ; CHECK: lxvd2x 34, 0, 3
314 define void @test29(<2 x double>* %a, <2 x double> %b) {
315 store <2 x double> %b, <2 x double>* %a, align 16
318 ; CHECK-LABEL: @test29
319 ; CHECK: stxvd2x 34, 0, 3
323 define <2 x double> @test28u(<2 x double>* %a) {
324 %v = load <2 x double>* %a, align 8
327 ; CHECK-LABEL: @test28u
328 ; CHECK: lxvd2x 34, 0, 3
332 define void @test29u(<2 x double>* %a, <2 x double> %b) {
333 store <2 x double> %b, <2 x double>* %a, align 8
336 ; CHECK-LABEL: @test29u
337 ; CHECK: stxvd2x 34, 0, 3
341 define <2 x i64> @test30(<2 x i64>* %a) {
342 %v = load <2 x i64>* %a, align 16
345 ; CHECK-LABEL: @test30
346 ; CHECK: lxvd2x 34, 0, 3
350 define void @test31(<2 x i64>* %a, <2 x i64> %b) {
351 store <2 x i64> %b, <2 x i64>* %a, align 16
354 ; CHECK-LABEL: @test31
355 ; CHECK: stxvd2x 34, 0, 3
359 define <4 x float> @test32(<4 x float>* %a) {
360 %v = load <4 x float>* %a, align 16
363 ; CHECK-LABEL: @test32
364 ; CHECK: lxvw4x 34, 0, 3
368 define void @test33(<4 x float>* %a, <4 x float> %b) {
369 store <4 x float> %b, <4 x float>* %a, align 16
372 ; CHECK-LABEL: @test33
373 ; CHECK: stxvw4x 34, 0, 3
377 define <4 x float> @test32u(<4 x float>* %a) {
378 %v = load <4 x float>* %a, align 8
381 ; CHECK-LABEL: @test32u
389 define void @test33u(<4 x float>* %a, <4 x float> %b) {
390 store <4 x float> %b, <4 x float>* %a, align 8
393 ; CHECK-LABEL: @test33u
394 ; CHECK: stxvw4x 34, 0, 3
398 define <4 x i32> @test34(<4 x i32>* %a) {
399 %v = load <4 x i32>* %a, align 16
402 ; CHECK-LABEL: @test34
403 ; CHECK: lxvw4x 34, 0, 3
407 define void @test35(<4 x i32>* %a, <4 x i32> %b) {
408 store <4 x i32> %b, <4 x i32>* %a, align 16
411 ; CHECK-LABEL: @test35
412 ; CHECK: stxvw4x 34, 0, 3
416 define <2 x double> @test40(<2 x i64> %a) {
417 %v = uitofp <2 x i64> %a to <2 x double>
420 ; CHECK-LABEL: @test40
421 ; CHECK: xvcvuxddp 34, 34
425 define <2 x double> @test41(<2 x i64> %a) {
426 %v = sitofp <2 x i64> %a to <2 x double>
429 ; CHECK-LABEL: @test41
430 ; CHECK: xvcvsxddp 34, 34
434 define <2 x i64> @test42(<2 x double> %a) {
435 %v = fptoui <2 x double> %a to <2 x i64>
438 ; CHECK-LABEL: @test42
439 ; CHECK: xvcvdpuxds 34, 34
443 define <2 x i64> @test43(<2 x double> %a) {
444 %v = fptosi <2 x double> %a to <2 x i64>
447 ; CHECK-LABEL: @test43
448 ; CHECK: xvcvdpsxds 34, 34
452 define <2 x float> @test44(<2 x i64> %a) {
453 %v = uitofp <2 x i64> %a to <2 x float>
456 ; CHECK-LABEL: @test44
457 ; FIXME: The code quality here looks pretty bad.
461 define <2 x float> @test45(<2 x i64> %a) {
462 %v = sitofp <2 x i64> %a to <2 x float>
465 ; CHECK-LABEL: @test45
466 ; FIXME: The code quality here looks pretty bad.
470 define <2 x i64> @test46(<2 x float> %a) {
471 %v = fptoui <2 x float> %a to <2 x i64>
474 ; CHECK-LABEL: @test46
475 ; FIXME: The code quality here looks pretty bad.
479 define <2 x i64> @test47(<2 x float> %a) {
480 %v = fptosi <2 x float> %a to <2 x i64>
483 ; CHECK-LABEL: @test47
484 ; FIXME: The code quality here looks pretty bad.
488 define <2 x double> @test50(double* %a) {
489 %v = load double* %a, align 8
490 %w = insertelement <2 x double> undef, double %v, i32 0
491 %x = insertelement <2 x double> %w, double %v, i32 1
494 ; CHECK-LABEL: @test50
495 ; CHECK: lxvdsx 34, 0, 3
499 define <2 x double> @test51(<2 x double> %a, <2 x double> %b) {
500 %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
503 ; CHECK-LABEL: @test51
504 ; CHECK: xxpermdi 34, 34, 34, 0
508 define <2 x double> @test52(<2 x double> %a, <2 x double> %b) {
509 %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
512 ; CHECK-LABEL: @test52
513 ; CHECK: xxpermdi 34, 34, 35, 0
517 define <2 x double> @test53(<2 x double> %a, <2 x double> %b) {
518 %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 0>
521 ; CHECK-LABEL: @test53
522 ; CHECK: xxpermdi 34, 35, 34, 0
526 define <2 x double> @test54(<2 x double> %a, <2 x double> %b) {
527 %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
530 ; CHECK-LABEL: @test54
531 ; CHECK: xxpermdi 34, 34, 35, 2
535 define <2 x double> @test55(<2 x double> %a, <2 x double> %b) {
536 %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
539 ; CHECK-LABEL: @test55
540 ; CHECK: xxpermdi 34, 34, 35, 3
544 define <2 x i64> @test56(<2 x i64> %a, <2 x i64> %b) {
545 %v = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
548 ; CHECK-LABEL: @test56
549 ; CHECK: xxpermdi 34, 34, 35, 3
553 define <2 x i64> @test60(<2 x i64> %a, <2 x i64> %b) {
554 %v = shl <2 x i64> %a, %b
557 ; CHECK-LABEL: @test60
558 ; This should scalarize, and the current code quality is not good.
567 define <2 x i64> @test61(<2 x i64> %a, <2 x i64> %b) {
568 %v = lshr <2 x i64> %a, %b
571 ; CHECK-LABEL: @test61
572 ; This should scalarize, and the current code quality is not good.
581 define <2 x i64> @test62(<2 x i64> %a, <2 x i64> %b) {
582 %v = ashr <2 x i64> %a, %b
585 ; CHECK-LABEL: @test62
586 ; This should scalarize, and the current code quality is not good.
595 define double @test63(<2 x double> %a) {
596 %v = extractelement <2 x double> %a, i32 0
599 ; CHECK-LABEL: @test63
600 ; CHECK: xxlor 1, 34, 34
604 define double @test64(<2 x double> %a) {
605 %v = extractelement <2 x double> %a, i32 1
608 ; CHECK-LABEL: @test64
609 ; CHECK: xxpermdi 1, 34, 34, 2
613 define <2 x i1> @test65(<2 x i64> %a, <2 x i64> %b) {
614 %w = icmp eq <2 x i64> %a, %b
617 ; CHECK-LABEL: @test65
618 ; CHECK: vcmpequw 2, 2, 3
622 define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) {
623 %w = icmp ne <2 x i64> %a, %b
626 ; CHECK-LABEL: @test66
627 ; CHECK: vcmpequw {{[0-9]+}}, 2, 3
628 ; CHECK: xxlnor 34, {{[0-9]+}}, {{[0-9]+}}
632 define <2 x i1> @test67(<2 x i64> %a, <2 x i64> %b) {
633 %w = icmp ult <2 x i64> %a, %b
636 ; CHECK-LABEL: @test67
637 ; This should scalarize, and the current code quality is not good.
646 define <2 x double> @test68(<2 x i32> %a) {
647 %w = sitofp <2 x i32> %a to <2 x double>
650 ; CHECK-LABEL: @test68
651 ; CHECK: xxsldwi [[V1:[0-9]+]], 34, 34, 1
652 ; CHECK: xvcvsxwdp 34, [[V1]]
656 define <2 x double> @test69(<2 x i16> %a) {
657 %w = sitofp <2 x i16> %a to <2 x double>
660 ; CHECK-LABEL: @test69
661 ; CHECK: vspltisw [[V1:[0-9]+]], 8
662 ; CHECK: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
663 ; CHECK: vslw [[V3:[0-9]+]], 2, [[V2]]
664 ; CHECK: vsraw {{[0-9]+}}, [[V3]], [[V2]]
665 ; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
666 ; CHECK: xvcvsxwdp 34, [[V4]]
670 define <2 x double> @test70(<2 x i8> %a) {
671 %w = sitofp <2 x i8> %a to <2 x double>
674 ; CHECK-LABEL: @test70
675 ; CHECK: vspltisw [[V1:[0-9]+]], 12
676 ; CHECK: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
677 ; CHECK: vslw [[V3:[0-9]+]], 2, [[V2]]
678 ; CHECK: vsraw {{[0-9]+}}, [[V3]], [[V2]]
679 ; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
680 ; CHECK: xvcvsxwdp 34, [[V4]]
684 define <2 x i32> @test80(i32 %v) {
685 %b1 = insertelement <2 x i32> undef, i32 %v, i32 0
686 %b2 = shufflevector <2 x i32> %b1, <2 x i32> undef, <2 x i32> zeroinitializer
687 %i = add <2 x i32> %b2, <i32 2, i32 3>
690 ; CHECK-LABEL: @test80
691 ; CHECK-DAG: addi [[R1:[0-9]+]], 3, 3
692 ; CHECK-DAG: addi [[R2:[0-9]+]], 1, -16
693 ; CHECK-DAG: addi [[R3:[0-9]+]], 3, 2
694 ; CHECK: std [[R1]], -8(1)
695 ; CHECK: std [[R3]], -16(1)
696 ; CHECK: lxvd2x 34, 0, [[R2]]
701 define <2 x double> @test81(<4 x float> %b) {
702 %w = bitcast <4 x float> %b to <2 x double>
705 ; CHECK-LABEL: @test81
709 define double @test82(double %a, double %b, double %c, double %d) {
711 %m = fcmp oeq double %c, %d
712 %v = select i1 %m, double %a, double %b
715 ; CHECK-LABEL: @test82
716 ; CHECK: xscmpudp [[REG:[0-9]+]], 3, 4
717 ; CHECK: beqlr [[REG]]