1 ; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsldoi | wc -l | grep 2
2 ; RUN: llvm-as < %s | opt -instcombine | llc -march=ppc32 -mcpu=g5 | not grep vperm
4 void %VSLDOI_xy(<8 x short>* %A, <8 x short>* %B) {
6 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
7 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
8 %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
9 %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
10 %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
11 %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
12 %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
13 %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
14 %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
15 %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
16 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
17 %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
18 %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
19 %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
20 %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
21 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
22 %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
23 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
24 %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
25 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
26 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
27 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
28 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
29 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
30 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
31 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
32 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
33 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
34 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
35 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
36 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
37 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
38 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
39 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
40 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
41 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
42 %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
43 store <8 x short> %tmp33, <8 x short>* %A
47 void %VSLDOI_xx(<8 x short>* %A, <8 x short>* %B) {
48 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
49 %tmp2 = load <8 x short>* %A ; <<8 x short>> [#uses=1]
50 %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
51 %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
52 %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
53 %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
54 %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
55 %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
56 %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
57 %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
58 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
59 %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
60 %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
61 %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
62 %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
63 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
64 %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
65 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
66 %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
67 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
68 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
69 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
70 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
71 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
72 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
73 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
74 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
75 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
76 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
77 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
78 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
79 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
80 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
81 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
82 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
83 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
84 %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
85 store <8 x short> %tmp33, <8 x short>* %A
89 void %VPERM_promote(<8 x short>* %A, <8 x short>* %B) {
91 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
92 %tmp = cast <8 x short> %tmp to <4 x int> ; <<4 x int>> [#uses=1]
93 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
94 %tmp2 = cast <8 x short> %tmp2 to <4 x int> ; <<4 x int>> [#uses=1]
95 %tmp3 = call <4 x int> %llvm.ppc.altivec.vperm( <4 x int> %tmp, <4 x int> %tmp2, <16 x sbyte> < sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14 > ) ; <<4 x int>> [#uses=1]
96 %tmp3 = cast <4 x int> %tmp3 to <8 x short> ; <<8 x short>> [#uses=1]
97 store <8 x short> %tmp3, <8 x short>* %A
101 declare <4 x int> %llvm.ppc.altivec.vperm(<4 x int>, <4 x int>, <16 x sbyte>)