1 ; RUN: llvm-upgrade < %s | llvm-as | opt -instcombine | \
2 ; RUN: llc -march=ppc32 -mcpu=g5 | not grep vperm
3 ; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 > %t
4 ; RUN: grep vsldoi %t | count 2
5 ; RUN: grep vmrgh %t | count 7
6 ; RUN: grep vmrgl %t | count 6
7 ; RUN: grep vpkuhum %t | count 1
8 ; RUN: grep vpkuwum %t | count 1
10 void %VSLDOI_xy(<8 x short>* %A, <8 x short>* %B) {
12 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
13 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
14 %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
15 %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
16 %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
17 %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
18 %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
19 %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
20 %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
21 %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
22 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
23 %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
24 %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
25 %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
26 %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
27 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
28 %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
29 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
30 %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
31 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
32 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
33 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
34 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
35 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
36 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
37 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
38 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
39 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
40 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
41 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
42 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
43 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
44 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
45 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
46 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
47 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
48 %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
49 store <8 x short> %tmp33, <8 x short>* %A
53 void %VSLDOI_xx(<8 x short>* %A, <8 x short>* %B) {
54 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
55 %tmp2 = load <8 x short>* %A ; <<8 x short>> [#uses=1]
56 %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
57 %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
58 %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
59 %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
60 %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
61 %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
62 %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
63 %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
64 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
65 %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
66 %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
67 %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
68 %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
69 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
70 %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
71 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
72 %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
73 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
74 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
75 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
76 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
77 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
78 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
79 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
80 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
81 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
82 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
83 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
84 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
85 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
86 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
87 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
88 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
89 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
90 %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
91 store <8 x short> %tmp33, <8 x short>* %A
95 void %VPERM_promote(<8 x short>* %A, <8 x short>* %B) {
97 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
98 %tmp = cast <8 x short> %tmp to <4 x int> ; <<4 x int>> [#uses=1]
99 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
100 %tmp2 = cast <8 x short> %tmp2 to <4 x int> ; <<4 x int>> [#uses=1]
101 %tmp3 = call <4 x int> %llvm.ppc.altivec.vperm( <4 x int> %tmp, <4 x int> %tmp2, <16 x sbyte> < sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14 > ) ; <<4 x int>> [#uses=1]
102 %tmp3 = cast <4 x int> %tmp3 to <8 x short> ; <<8 x short>> [#uses=1]
103 store <8 x short> %tmp3, <8 x short>* %A
107 declare <4 x int> %llvm.ppc.altivec.vperm(<4 x int>, <4 x int>, <16 x sbyte>)
110 void %tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) {
112 %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8]
113 %tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8]
114 %tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
115 %tmp3 = extractelement <16 x sbyte> %tmp2, uint 8 ; <sbyte> [#uses=1]
116 %tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
117 %tmp5 = extractelement <16 x sbyte> %tmp2, uint 9 ; <sbyte> [#uses=1]
118 %tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
119 %tmp7 = extractelement <16 x sbyte> %tmp2, uint 10 ; <sbyte> [#uses=1]
120 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
121 %tmp9 = extractelement <16 x sbyte> %tmp2, uint 11 ; <sbyte> [#uses=1]
122 %tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
123 %tmp11 = extractelement <16 x sbyte> %tmp2, uint 12 ; <sbyte> [#uses=1]
124 %tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
125 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 13 ; <sbyte> [#uses=1]
126 %tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
127 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 14 ; <sbyte> [#uses=1]
128 %tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
129 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 15 ; <sbyte> [#uses=1]
130 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
131 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
132 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
133 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
134 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
135 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
136 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
137 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
138 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
139 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
140 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
141 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
142 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
143 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
144 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
145 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
146 store <16 x sbyte> %tmp33, <16 x sbyte>* %A
150 void %th_l(<8 x short>* %A, <8 x short>* %B) {
152 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4]
153 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4]
154 %tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
155 %tmp3 = extractelement <8 x short> %tmp2, uint 4 ; <short> [#uses=1]
156 %tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
157 %tmp5 = extractelement <8 x short> %tmp2, uint 5 ; <short> [#uses=1]
158 %tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
159 %tmp7 = extractelement <8 x short> %tmp2, uint 6 ; <short> [#uses=1]
160 %tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
161 %tmp9 = extractelement <8 x short> %tmp2, uint 7 ; <short> [#uses=1]
162 %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
163 %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
164 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
165 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
166 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
167 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
168 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
169 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
170 store <8 x short> %tmp17, <8 x short>* %A
174 void %tw_l(<4 x int>* %A, <4 x int>* %B) {
176 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
177 %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
178 %tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
179 %tmp3 = extractelement <4 x int> %tmp2, uint 2 ; <int> [#uses=1]
180 %tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
181 %tmp5 = extractelement <4 x int> %tmp2, uint 3 ; <int> [#uses=1]
182 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
183 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
184 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
185 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
186 store <4 x int> %tmp9, <4 x int>* %A
190 void %tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) {
192 %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8]
193 %tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8]
194 %tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
195 %tmp3 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
196 %tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
197 %tmp5 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
198 %tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
199 %tmp7 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
200 %tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
201 %tmp9 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
202 %tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
203 %tmp11 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
204 %tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
205 %tmp13 = extractelement <16 x sbyte> %tmp2, uint 5 ; <sbyte> [#uses=1]
206 %tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
207 %tmp15 = extractelement <16 x sbyte> %tmp2, uint 6 ; <sbyte> [#uses=1]
208 %tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
209 %tmp17 = extractelement <16 x sbyte> %tmp2, uint 7 ; <sbyte> [#uses=1]
210 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
211 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
212 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
213 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
214 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
215 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
216 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
217 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
218 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
219 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
220 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
221 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
222 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
223 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
224 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
225 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
226 store <16 x sbyte> %tmp33, <16 x sbyte>* %A
230 void %th_h(<8 x short>* %A, <8 x short>* %B) {
232 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4]
233 %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4]
234 %tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
235 %tmp3 = extractelement <8 x short> %tmp2, uint 0 ; <short> [#uses=1]
236 %tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
237 %tmp5 = extractelement <8 x short> %tmp2, uint 1 ; <short> [#uses=1]
238 %tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
239 %tmp7 = extractelement <8 x short> %tmp2, uint 2 ; <short> [#uses=1]
240 %tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
241 %tmp9 = extractelement <8 x short> %tmp2, uint 3 ; <short> [#uses=1]
242 %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
243 %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
244 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
245 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
246 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
247 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
248 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
249 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
250 store <8 x short> %tmp17, <8 x short>* %A
254 void %tw_h(<4 x int>* %A, <4 x int>* %B) {
256 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
257 %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
258 %tmp = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1]
259 %tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
260 %tmp4 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1]
261 %tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
262 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
263 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
264 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
265 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
266 store <4 x int> %tmp9, <4 x int>* %A
270 void %tw_h_flop(<4 x int>* %A, <4 x int>* %B) {
271 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
272 %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
273 %tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
274 %tmp3 = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1]
275 %tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
276 %tmp5 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1]
277 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
278 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
279 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
280 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
281 store <4 x int> %tmp9, <4 x int>* %A
286 void %VMRG_UNARY_tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) {
288 %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16]
289 %tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
290 %tmp3 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
291 %tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
292 %tmp5 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
293 %tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
294 %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
295 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
296 %tmp9 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
297 %tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
298 %tmp11 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
299 %tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
300 %tmp13 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
301 %tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
302 %tmp15 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
303 %tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
304 %tmp17 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
305 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
306 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
307 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
308 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
309 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
310 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
311 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
312 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
313 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
314 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
315 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
316 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
317 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
318 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
319 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
320 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
321 store <16 x sbyte> %tmp33, <16 x sbyte>* %A
325 void %VMRG_UNARY_th_l(<8 x short>* %A, <8 x short>* %B) {
327 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8]
328 %tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
329 %tmp3 = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
330 %tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
331 %tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
332 %tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
333 %tmp7 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
334 %tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
335 %tmp9 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
336 %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
337 %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
338 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
339 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
340 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
341 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
342 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
343 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
344 store <8 x short> %tmp17, <8 x short>* %A
348 void %VMRG_UNARY_tw_l(<4 x int>* %A, <4 x int>* %B) {
350 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4]
351 %tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
352 %tmp3 = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
353 %tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
354 %tmp5 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
355 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
356 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
357 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
358 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
359 store <4 x int> %tmp9, <4 x int>* %A
363 void %VMRG_UNARY_tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) {
365 %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16]
366 %tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
367 %tmp3 = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
368 %tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
369 %tmp5 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
370 %tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
371 %tmp7 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
372 %tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
373 %tmp9 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
374 %tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
375 %tmp11 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
376 %tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
377 %tmp13 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
378 %tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
379 %tmp15 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
380 %tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
381 %tmp17 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
382 %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
383 %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
384 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
385 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
386 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
387 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
388 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
389 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
390 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
391 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
392 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
393 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
394 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
395 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
396 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
397 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
398 store <16 x sbyte> %tmp33, <16 x sbyte>* %A
402 void %VMRG_UNARY_th_h(<8 x short>* %A, <8 x short>* %B) {
404 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8]
405 %tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
406 %tmp3 = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
407 %tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
408 %tmp5 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
409 %tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
410 %tmp7 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
411 %tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
412 %tmp9 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
413 %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
414 %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
415 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
416 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
417 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
418 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
419 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
420 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
421 store <8 x short> %tmp17, <8 x short>* %A
425 void %VMRG_UNARY_tw_h(<4 x int>* %A, <4 x int>* %B) {
427 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4]
428 %tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
429 %tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
430 %tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
431 %tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
432 %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
433 %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
434 %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
435 %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
436 store <4 x int> %tmp9, <4 x int>* %A
440 void %VPCKUHUM_unary(<8 x short>* %A, <8 x short>* %B) {
442 %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=2]
443 %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8]
444 %tmp3 = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8]
445 %tmp = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
446 %tmp4 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
447 %tmp5 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
448 %tmp6 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
449 %tmp7 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
450 %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
451 %tmp9 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
452 %tmp10 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
453 %tmp11 = extractelement <16 x sbyte> %tmp3, uint 1 ; <sbyte> [#uses=1]
454 %tmp12 = extractelement <16 x sbyte> %tmp3, uint 3 ; <sbyte> [#uses=1]
455 %tmp13 = extractelement <16 x sbyte> %tmp3, uint 5 ; <sbyte> [#uses=1]
456 %tmp14 = extractelement <16 x sbyte> %tmp3, uint 7 ; <sbyte> [#uses=1]
457 %tmp15 = extractelement <16 x sbyte> %tmp3, uint 9 ; <sbyte> [#uses=1]
458 %tmp16 = extractelement <16 x sbyte> %tmp3, uint 11 ; <sbyte> [#uses=1]
459 %tmp17 = extractelement <16 x sbyte> %tmp3, uint 13 ; <sbyte> [#uses=1]
460 %tmp18 = extractelement <16 x sbyte> %tmp3, uint 15 ; <sbyte> [#uses=1]
461 %tmp19 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
462 %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 1 ; <<16 x sbyte>> [#uses=1]
463 %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 2 ; <<16 x sbyte>> [#uses=1]
464 %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 3 ; <<16 x sbyte>> [#uses=1]
465 %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 4 ; <<16 x sbyte>> [#uses=1]
466 %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 5 ; <<16 x sbyte>> [#uses=1]
467 %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 6 ; <<16 x sbyte>> [#uses=1]
468 %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 7 ; <<16 x sbyte>> [#uses=1]
469 %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 8 ; <<16 x sbyte>> [#uses=1]
470 %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 9 ; <<16 x sbyte>> [#uses=1]
471 %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 10 ; <<16 x sbyte>> [#uses=1]
472 %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 11 ; <<16 x sbyte>> [#uses=1]
473 %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 12 ; <<16 x sbyte>> [#uses=1]
474 %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 13 ; <<16 x sbyte>> [#uses=1]
475 %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 14 ; <<16 x sbyte>> [#uses=1]
476 %tmp34 = insertelement <16 x sbyte> %tmp33, sbyte %tmp18, uint 15 ; <<16 x sbyte>> [#uses=1]
477 %tmp34 = cast <16 x sbyte> %tmp34 to <8 x short> ; <<8 x short>> [#uses=1]
478 store <8 x short> %tmp34, <8 x short>* %A
482 void %VPCKUWUM_unary(<4 x int>* %A, <4 x int>* %B) {
484 %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
485 %tmp = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4]
486 %tmp3 = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4]
487 %tmp = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
488 %tmp4 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
489 %tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
490 %tmp6 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
491 %tmp7 = extractelement <8 x short> %tmp3, uint 1 ; <short> [#uses=1]
492 %tmp8 = extractelement <8 x short> %tmp3, uint 3 ; <short> [#uses=1]
493 %tmp9 = extractelement <8 x short> %tmp3, uint 5 ; <short> [#uses=1]
494 %tmp10 = extractelement <8 x short> %tmp3, uint 7 ; <short> [#uses=1]
495 %tmp11 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
496 %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 1 ; <<8 x short>> [#uses=1]
497 %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 2 ; <<8 x short>> [#uses=1]
498 %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 3 ; <<8 x short>> [#uses=1]
499 %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 4 ; <<8 x short>> [#uses=1]
500 %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 5 ; <<8 x short>> [#uses=1]
501 %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 6 ; <<8 x short>> [#uses=1]
502 %tmp18 = insertelement <8 x short> %tmp17, short %tmp10, uint 7 ; <<8 x short>> [#uses=1]
503 %tmp18 = cast <8 x short> %tmp18 to <4 x int> ; <<4 x int>> [#uses=1]
504 store <4 x int> %tmp18, <4 x int>* %A