1 ; RUN: llvm-upgrade < %s | llvm-as | opt -instcombine | llc -march=ppc32 -mcpu=g5 | not grep vperm &&
2 ; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vsldoi | wc -l | grep 2 &&
3 ; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vmrgh | wc -l | grep 7 &&
4 ; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vmrgl | wc -l | grep 6 &&
5 ; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vpkuhum | wc -l | grep 1 &&
6 ; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vpkuwum | wc -l | grep 1
8 void %VSLDOI_xy(<8 x short>* %A, <8 x short>* %B) {
10 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
11 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
12 %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
13 %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
14 %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
15 %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
16 %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
17 %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
18 %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
19 %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
20 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
21 %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
22 %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
23 %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
24 %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
25 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
26 %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
27 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
28 %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
29 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
30 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
31 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
32 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
33 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
34 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
35 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
36 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
37 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
38 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
39 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
40 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
41 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
42 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
43 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
44 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
45 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
46 %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
47 store <8 x short> %tmp33, <8 x short>* %A
51 void %VSLDOI_xx(<8 x short>* %A, <8 x short>* %B) {
52 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
53 %tmp2 = load <8 x short>* %A ; <<8 x short>> [#uses=1]
54 %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
55 %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
56 %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
57 %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
58 %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
59 %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
60 %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
61 %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
62 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
63 %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
64 %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
65 %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
66 %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
67 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
68 %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
69 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
70 %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
71 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
72 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
73 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
74 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
75 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
76 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
77 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
78 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
79 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
80 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
81 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
82 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
83 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
84 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
85 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
86 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
87 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
88 %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
89 store <8 x short> %tmp33, <8 x short>* %A
93 void %VPERM_promote(<8 x short>* %A, <8 x short>* %B) {
95 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
96 %tmp = cast <8 x short> %tmp to <4 x int> ; <<4 x int>> [#uses=1]
97 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
98 %tmp2 = cast <8 x short> %tmp2 to <4 x int> ; <<4 x int>> [#uses=1]
99 %tmp3 = call <4 x int> %llvm.ppc.altivec.vperm( <4 x int> %tmp, <4 x int> %tmp2, <16 x sbyte> < sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14 > ) ; <<4 x int>> [#uses=1]
100 %tmp3 = cast <4 x int> %tmp3 to <8 x short> ; <<8 x short>> [#uses=1]
101 store <8 x short> %tmp3, <8 x short>* %A
105 declare <4 x int> %llvm.ppc.altivec.vperm(<4 x int>, <4 x int>, <16 x sbyte>)
108 void %tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) {
110 %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8]
111 %tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8]
112 %tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
113 %tmp3 = extractelement <16 x sbyte> %tmp2, uint 8 ; <sbyte> [#uses=1]
114 %tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
115 %tmp5 = extractelement <16 x sbyte> %tmp2, uint 9 ; <sbyte> [#uses=1]
116 %tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
117 %tmp7 = extractelement <16 x sbyte> %tmp2, uint 10 ; <sbyte> [#uses=1]
118 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
119 %tmp9 = extractelement <16 x sbyte> %tmp2, uint 11 ; <sbyte> [#uses=1]
120 %tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
121 %tmp11 = extractelement <16 x sbyte> %tmp2, uint 12 ; <sbyte> [#uses=1]
122 %tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
123 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 13 ; <sbyte> [#uses=1]
124 %tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
125 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 14 ; <sbyte> [#uses=1]
126 %tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
127 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 15 ; <sbyte> [#uses=1]
128 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
129 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
130 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
131 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
132 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
133 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
134 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
135 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
136 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
137 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
138 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
139 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
140 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
141 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
142 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
143 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
144 store <16 x sbyte> %tmp33, <16 x sbyte>* %A
148 void %th_l(<8 x short>* %A, <8 x short>* %B) {
150 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4]
151 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4]
152 %tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
153 %tmp3 = extractelement <8 x short> %tmp2, uint 4 ; <short> [#uses=1]
154 %tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
155 %tmp5 = extractelement <8 x short> %tmp2, uint 5 ; <short> [#uses=1]
156 %tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
157 %tmp7 = extractelement <8 x short> %tmp2, uint 6 ; <short> [#uses=1]
158 %tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
159 %tmp9 = extractelement <8 x short> %tmp2, uint 7 ; <short> [#uses=1]
160 %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
161 %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
162 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
163 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
164 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
165 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
166 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
167 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
168 store <8 x short> %tmp17, <8 x short>* %A
172 void %tw_l(<4 x int>* %A, <4 x int>* %B) {
174 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
175 %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
176 %tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
177 %tmp3 = extractelement <4 x int> %tmp2, uint 2 ; <int> [#uses=1]
178 %tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
179 %tmp5 = extractelement <4 x int> %tmp2, uint 3 ; <int> [#uses=1]
180 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
181 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
182 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
183 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
184 store <4 x int> %tmp9, <4 x int>* %A
188 void %tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) {
190 %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8]
191 %tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8]
192 %tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
193 %tmp3 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
194 %tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
195 %tmp5 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
196 %tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
197 %tmp7 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
198 %tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
199 %tmp9 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
200 %tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
201 %tmp11 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
202 %tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
203 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 5 ; <sbyte> [#uses=1]
204 %tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
205 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 6 ; <sbyte> [#uses=1]
206 %tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
207 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 7 ; <sbyte> [#uses=1]
208 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
209 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
210 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
211 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
212 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
213 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
214 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
215 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
216 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
217 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
218 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
219 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
220 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
221 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
222 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
223 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
224 store <16 x sbyte> %tmp33, <16 x sbyte>* %A
228 void %th_h(<8 x short>* %A, <8 x short>* %B) {
230 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4]
231 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4]
232 %tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
233 %tmp3 = extractelement <8 x short> %tmp2, uint 0 ; <short> [#uses=1]
234 %tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
235 %tmp5 = extractelement <8 x short> %tmp2, uint 1 ; <short> [#uses=1]
236 %tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
237 %tmp7 = extractelement <8 x short> %tmp2, uint 2 ; <short> [#uses=1]
238 %tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
239 %tmp9 = extractelement <8 x short> %tmp2, uint 3 ; <short> [#uses=1]
240 %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
241 %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
242 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
243 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
244 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
245 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
246 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
247 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
248 store <8 x short> %tmp17, <8 x short>* %A
252 void %tw_h(<4 x int>* %A, <4 x int>* %B) {
254 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
255 %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
256 %tmp = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1]
257 %tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
258 %tmp4 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1]
259 %tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
260 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
261 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
262 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
263 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
264 store <4 x int> %tmp9, <4 x int>* %A
268 void %tw_h_flop(<4 x int>* %A, <4 x int>* %B) {
269 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
270 %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
271 %tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
272 %tmp3 = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1]
273 %tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
274 %tmp5 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1]
275 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
276 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
277 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
278 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
279 store <4 x int> %tmp9, <4 x int>* %A
284 void %VMRG_UNARY_tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) {
286 %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16]
287 %tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
288 %tmp3 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
289 %tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
290 %tmp5 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
291 %tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
292 %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
293 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
294 %tmp9 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
295 %tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
296 %tmp11 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
297 %tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
298 %tmp13 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
299 %tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
300 %tmp15 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
301 %tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
302 %tmp17 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
303 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
304 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
305 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
306 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
307 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
308 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
309 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
310 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
311 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
312 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
313 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
314 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
315 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
316 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
317 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
318 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
319 store <16 x sbyte> %tmp33, <16 x sbyte>* %A
323 void %VMRG_UNARY_th_l(<8 x short>* %A, <8 x short>* %B) {
325 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8]
326 %tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
327 %tmp3 = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
328 %tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
329 %tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
330 %tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
331 %tmp7 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
332 %tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
333 %tmp9 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
334 %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
335 %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
336 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
337 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
338 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
339 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
340 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
341 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
342 store <8 x short> %tmp17, <8 x short>* %A
346 void %VMRG_UNARY_tw_l(<4 x int>* %A, <4 x int>* %B) {
348 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4]
349 %tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
350 %tmp3 = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
351 %tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
352 %tmp5 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
353 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
354 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
355 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
356 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
357 store <4 x int> %tmp9, <4 x int>* %A
361 void %VMRG_UNARY_tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) {
363 %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16]
364 %tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
365 %tmp3 = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
366 %tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
367 %tmp5 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
368 %tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
369 %tmp7 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
370 %tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
371 %tmp9 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
372 %tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
373 %tmp11 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
374 %tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
375 %tmp13 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
376 %tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
377 %tmp15 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
378 %tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
379 %tmp17 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
380 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
381 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
382 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
383 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
384 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
385 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
386 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
387 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
388 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
389 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
390 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
391 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
392 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
393 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
394 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
395 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
396 store <16 x sbyte> %tmp33, <16 x sbyte>* %A
400 void %VMRG_UNARY_th_h(<8 x short>* %A, <8 x short>* %B) {
402 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8]
403 %tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
404 %tmp3 = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
405 %tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
406 %tmp5 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
407 %tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
408 %tmp7 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
409 %tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
410 %tmp9 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
411 %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
412 %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
413 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
414 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
415 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
416 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
417 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
418 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
419 store <8 x short> %tmp17, <8 x short>* %A
423 void %VMRG_UNARY_tw_h(<4 x int>* %A, <4 x int>* %B) {
425 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4]
426 %tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
427 %tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
428 %tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
429 %tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
430 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
431 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
432 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
433 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
434 store <4 x int> %tmp9, <4 x int>* %A
438 void %VPCKUHUM_unary(<8 x short>* %A, <8 x short>* %B) {
440 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=2]
441 %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8]
442 %tmp3 = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8]
443 %tmp = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
444 %tmp4 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
445 %tmp5 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
446 %tmp6 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
447 %tmp7 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
448 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
449 %tmp9 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
450 %tmp10 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
451 %tmp11 = extractelement <16 x sbyte> %tmp3, uint 1 ; <sbyte> [#uses=1]
452 %tmp12 = extractelement <16 x sbyte> %tmp3, uint 3 ; <sbyte> [#uses=1]
453 %tmp13 = extractelement <16 x sbyte> %tmp3, uint 5 ; <sbyte> [#uses=1]
454 %tmp14 = extractelement <16 x sbyte> %tmp3, uint 7 ; <sbyte> [#uses=1]
455 %tmp15 = extractelement <16 x sbyte> %tmp3, uint 9 ; <sbyte> [#uses=1]
456 %tmp16 = extractelement <16 x sbyte> %tmp3, uint 11 ; <sbyte> [#uses=1]
457 %tmp17 = extractelement <16 x sbyte> %tmp3, uint 13 ; <sbyte> [#uses=1]
458 %tmp18 = extractelement <16 x sbyte> %tmp3, uint 15 ; <sbyte> [#uses=1]
459 %tmp19 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
460 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 1 ; <<16 x sbyte>> [#uses=1]
461 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 2 ; <<16 x sbyte>> [#uses=1]
462 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 3 ; <<16 x sbyte>> [#uses=1]
463 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 4 ; <<16 x sbyte>> [#uses=1]
464 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 5 ; <<16 x sbyte>> [#uses=1]
465 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 6 ; <<16 x sbyte>> [#uses=1]
466 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 7 ; <<16 x sbyte>> [#uses=1]
467 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 8 ; <<16 x sbyte>> [#uses=1]
468 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 9 ; <<16 x sbyte>> [#uses=1]
469 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 10 ; <<16 x sbyte>> [#uses=1]
470 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 11 ; <<16 x sbyte>> [#uses=1]
471 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 12 ; <<16 x sbyte>> [#uses=1]
472 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 13 ; <<16 x sbyte>> [#uses=1]
473 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 14 ; <<16 x sbyte>> [#uses=1]
474 %tmp34 = insertelement <16 x sbyte> %tmp33, sbyte %tmp18, uint 15 ; <<16 x sbyte>> [#uses=1]
475 %tmp34 = cast <16 x sbyte> %tmp34 to <8 x short> ; <<8 x short>> [#uses=1]
476 store <8 x short> %tmp34, <8 x short>* %A
480 void %VPCKUWUM_unary(<4 x int>* %A, <4 x int>* %B) {
482 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
483 %tmp = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4]
484 %tmp3 = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4]
485 %tmp = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
486 %tmp4 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
487 %tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
488 %tmp6 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
489 %tmp7 = extractelement <8 x short> %tmp3, uint 1 ; <short> [#uses=1]
490 %tmp8 = extractelement <8 x short> %tmp3, uint 3 ; <short> [#uses=1]
491 %tmp9 = extractelement <8 x short> %tmp3, uint 5 ; <short> [#uses=1]
492 %tmp10 = extractelement <8 x short> %tmp3, uint 7 ; <short> [#uses=1]
493 %tmp11 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
494 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 1 ; <<8 x short>> [#uses=1]
495 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 2 ; <<8 x short>> [#uses=1]
496 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 3 ; <<8 x short>> [#uses=1]
497 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 4 ; <<8 x short>> [#uses=1]
498 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 5 ; <<8 x short>> [#uses=1]
499 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 6 ; <<8 x short>> [#uses=1]
500 %tmp18 = insertelement <8 x short> %tmp17, short %tmp10, uint 7 ; <<8 x short>> [#uses=1]
501 %tmp18 = cast <8 x short> %tmp18 to <4 x int> ; <<4 x int>> [#uses=1]
502 store <4 x int> %tmp18, <4 x int>* %A