1 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
2 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
4 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
6 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
9 %0 = bitcast <1 x i64> %b to <4 x i16>
10 %1 = bitcast <1 x i64> %a to <4 x i16>
11 %2 = bitcast <4 x i16> %1 to x86_mmx
12 %3 = bitcast <4 x i16> %0 to x86_mmx
13 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
14 %5 = bitcast x86_mmx %4 to <4 x i16>
15 %6 = bitcast <4 x i16> %5 to <1 x i64>
16 %7 = extractelement <1 x i64> %6, i32 0
20 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
22 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
25 %0 = bitcast <1 x i64> %b to <2 x i32>
26 %1 = bitcast <1 x i64> %a to <2 x i32>
27 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
28 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
29 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
30 %3 = bitcast x86_mmx %2 to <2 x i32>
31 %4 = bitcast <2 x i32> %3 to <1 x i64>
32 %5 = extractelement <1 x i64> %4, i32 0
36 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
38 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
41 %0 = bitcast <1 x i64> %b to <4 x i16>
42 %1 = bitcast <1 x i64> %a to <4 x i16>
43 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
44 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
45 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
46 %3 = bitcast x86_mmx %2 to <4 x i16>
47 %4 = bitcast <4 x i16> %3 to <1 x i64>
48 %5 = extractelement <1 x i64> %4, i32 0
52 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
54 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
57 %0 = bitcast <1 x i64> %b to <8 x i8>
58 %1 = bitcast <1 x i64> %a to <8 x i8>
59 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
60 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
61 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
62 %3 = bitcast x86_mmx %2 to <8 x i8>
63 %4 = bitcast <8 x i8> %3 to <1 x i64>
64 %5 = extractelement <1 x i64> %4, i32 0
68 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
70 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
73 %0 = bitcast <1 x i64> %b to <2 x i32>
74 %1 = bitcast <1 x i64> %a to <2 x i32>
75 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
76 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
77 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
78 %3 = bitcast x86_mmx %2 to <2 x i32>
79 %4 = bitcast <2 x i32> %3 to <1 x i64>
80 %5 = extractelement <1 x i64> %4, i32 0
84 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
86 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
89 %0 = bitcast <1 x i64> %b to <4 x i16>
90 %1 = bitcast <1 x i64> %a to <4 x i16>
91 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
92 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
93 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
94 %3 = bitcast x86_mmx %2 to <4 x i16>
95 %4 = bitcast <4 x i16> %3 to <1 x i64>
96 %5 = extractelement <1 x i64> %4, i32 0
100 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
102 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
105 %0 = bitcast <1 x i64> %b to <8 x i8>
106 %1 = bitcast <1 x i64> %a to <8 x i8>
107 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
108 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
109 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
110 %3 = bitcast x86_mmx %2 to <8 x i8>
111 %4 = bitcast <8 x i8> %3 to <1 x i64>
112 %5 = extractelement <1 x i64> %4, i32 0
116 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
118 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
121 %0 = bitcast <1 x i64> %b to <2 x i32>
122 %1 = bitcast <1 x i64> %a to <2 x i32>
123 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
124 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
125 %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
126 %3 = bitcast x86_mmx %2 to <2 x i32>
127 %4 = bitcast <2 x i32> %3 to <1 x i64>
128 %5 = extractelement <1 x i64> %4, i32 0
132 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
134 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
137 %0 = bitcast <1 x i64> %b to <4 x i16>
138 %1 = bitcast <1 x i64> %a to <4 x i16>
139 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
140 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
141 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
142 %3 = bitcast x86_mmx %2 to <4 x i16>
143 %4 = bitcast <4 x i16> %3 to <1 x i64>
144 %5 = extractelement <1 x i64> %4, i32 0
148 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
150 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
153 %0 = bitcast <1 x i64> %b to <8 x i8>
154 %1 = bitcast <1 x i64> %a to <8 x i8>
155 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
156 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
157 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
158 %3 = bitcast x86_mmx %2 to <8 x i8>
159 %4 = bitcast <8 x i8> %3 to <1 x i64>
160 %5 = extractelement <1 x i64> %4, i32 0
164 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
166 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
169 %0 = bitcast <1 x i64> %b to <2 x i32>
170 %1 = bitcast <1 x i64> %a to <2 x i32>
171 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
172 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
173 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
174 %3 = bitcast x86_mmx %2 to <2 x i32>
175 %4 = bitcast <2 x i32> %3 to <1 x i64>
176 %5 = extractelement <1 x i64> %4, i32 0
180 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
182 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
185 %0 = bitcast <1 x i64> %b to <4 x i16>
186 %1 = bitcast <1 x i64> %a to <4 x i16>
187 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
188 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
189 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
190 %3 = bitcast x86_mmx %2 to <4 x i16>
191 %4 = bitcast <4 x i16> %3 to <1 x i64>
192 %5 = extractelement <1 x i64> %4, i32 0
196 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
198 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
201 %0 = bitcast <1 x i64> %b to <8 x i8>
202 %1 = bitcast <1 x i64> %a to <8 x i8>
203 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
204 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
205 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
206 %3 = bitcast x86_mmx %2 to <8 x i8>
207 %4 = bitcast <8 x i8> %3 to <1 x i64>
208 %5 = extractelement <1 x i64> %4, i32 0
212 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
214 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
217 %0 = bitcast <1 x i64> %b to <4 x i16>
218 %1 = bitcast <1 x i64> %a to <4 x i16>
219 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
220 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
221 %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
222 %3 = bitcast x86_mmx %2 to <8 x i8>
223 %4 = bitcast <8 x i8> %3 to <1 x i64>
224 %5 = extractelement <1 x i64> %4, i32 0
228 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
230 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
233 %0 = bitcast <1 x i64> %b to <2 x i32>
234 %1 = bitcast <1 x i64> %a to <2 x i32>
235 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
236 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
237 %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
238 %3 = bitcast x86_mmx %2 to <4 x i16>
239 %4 = bitcast <4 x i16> %3 to <1 x i64>
240 %5 = extractelement <1 x i64> %4, i32 0
244 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
246 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
249 %0 = bitcast <1 x i64> %b to <4 x i16>
250 %1 = bitcast <1 x i64> %a to <4 x i16>
251 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
252 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
253 %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
254 %3 = bitcast x86_mmx %2 to <8 x i8>
255 %4 = bitcast <8 x i8> %3 to <1 x i64>
256 %5 = extractelement <1 x i64> %4, i32 0
260 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
262 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
265 %0 = bitcast <1 x i64> %a to <2 x i32>
266 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
267 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
268 %2 = bitcast x86_mmx %1 to <2 x i32>
269 %3 = bitcast <2 x i32> %2 to <1 x i64>
270 %4 = extractelement <1 x i64> %3, i32 0
274 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
276 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
279 %0 = bitcast <1 x i64> %a to <4 x i16>
280 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
281 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
282 %2 = bitcast x86_mmx %1 to <4 x i16>
283 %3 = bitcast <4 x i16> %2 to <1 x i64>
284 %4 = extractelement <1 x i64> %3, i32 0
288 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
290 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
293 %0 = extractelement <1 x i64> %a, i32 0
294 %mmx_var.i = bitcast i64 %0 to x86_mmx
295 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
296 %2 = bitcast x86_mmx %1 to i64
300 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
302 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
305 %0 = bitcast <1 x i64> %a to <2 x i32>
306 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
307 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
308 %2 = bitcast x86_mmx %1 to <2 x i32>
309 %3 = bitcast <2 x i32> %2 to <1 x i64>
310 %4 = extractelement <1 x i64> %3, i32 0
314 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
316 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
319 %0 = bitcast <1 x i64> %a to <4 x i16>
320 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
321 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
322 %2 = bitcast x86_mmx %1 to <4 x i16>
323 %3 = bitcast <4 x i16> %2 to <1 x i64>
324 %4 = extractelement <1 x i64> %3, i32 0
328 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
330 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
333 %0 = extractelement <1 x i64> %a, i32 0
334 %mmx_var.i = bitcast i64 %0 to x86_mmx
335 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
336 %2 = bitcast x86_mmx %1 to i64
340 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
342 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
345 %0 = bitcast <1 x i64> %a to <2 x i32>
346 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
347 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
348 %2 = bitcast x86_mmx %1 to <2 x i32>
349 %3 = bitcast <2 x i32> %2 to <1 x i64>
350 %4 = extractelement <1 x i64> %3, i32 0
354 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
356 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
359 %0 = bitcast <1 x i64> %a to <4 x i16>
360 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
361 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
362 %2 = bitcast x86_mmx %1 to <4 x i16>
363 %3 = bitcast <4 x i16> %2 to <1 x i64>
364 %4 = extractelement <1 x i64> %3, i32 0
368 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
370 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
373 %0 = bitcast <1 x i64> %a to <2 x i32>
374 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
375 %1 = extractelement <1 x i64> %b, i32 0
376 %mmx_var1.i = bitcast i64 %1 to x86_mmx
377 %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
378 %3 = bitcast x86_mmx %2 to <2 x i32>
379 %4 = bitcast <2 x i32> %3 to <1 x i64>
380 %5 = extractelement <1 x i64> %4, i32 0
384 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
386 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
389 %0 = bitcast <1 x i64> %a to <4 x i16>
390 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
391 %1 = extractelement <1 x i64> %b, i32 0
392 %mmx_var1.i = bitcast i64 %1 to x86_mmx
393 %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
394 %3 = bitcast x86_mmx %2 to <4 x i16>
395 %4 = bitcast <4 x i16> %3 to <1 x i64>
396 %5 = extractelement <1 x i64> %4, i32 0
400 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
402 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
405 %0 = extractelement <1 x i64> %a, i32 0
406 %mmx_var.i = bitcast i64 %0 to x86_mmx
407 %1 = extractelement <1 x i64> %b, i32 0
408 %mmx_var1.i = bitcast i64 %1 to x86_mmx
409 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
410 %3 = bitcast x86_mmx %2 to i64
414 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
416 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
419 %0 = bitcast <1 x i64> %a to <2 x i32>
420 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
421 %1 = extractelement <1 x i64> %b, i32 0
422 %mmx_var1.i = bitcast i64 %1 to x86_mmx
423 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
424 %3 = bitcast x86_mmx %2 to <2 x i32>
425 %4 = bitcast <2 x i32> %3 to <1 x i64>
426 %5 = extractelement <1 x i64> %4, i32 0
430 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
432 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
435 %0 = bitcast <1 x i64> %a to <4 x i16>
436 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
437 %1 = extractelement <1 x i64> %b, i32 0
438 %mmx_var1.i = bitcast i64 %1 to x86_mmx
439 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
440 %3 = bitcast x86_mmx %2 to <4 x i16>
441 %4 = bitcast <4 x i16> %3 to <1 x i64>
442 %5 = extractelement <1 x i64> %4, i32 0
446 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
448 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
451 %0 = extractelement <1 x i64> %a, i32 0
452 %mmx_var.i = bitcast i64 %0 to x86_mmx
453 %1 = extractelement <1 x i64> %b, i32 0
454 %mmx_var1.i = bitcast i64 %1 to x86_mmx
455 %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
456 %3 = bitcast x86_mmx %2 to i64
460 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
462 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
465 %0 = bitcast <1 x i64> %a to <2 x i32>
466 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
467 %1 = extractelement <1 x i64> %b, i32 0
468 %mmx_var1.i = bitcast i64 %1 to x86_mmx
469 %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
470 %3 = bitcast x86_mmx %2 to <2 x i32>
471 %4 = bitcast <2 x i32> %3 to <1 x i64>
472 %5 = extractelement <1 x i64> %4, i32 0
476 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
478 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
481 %0 = bitcast <1 x i64> %a to <4 x i16>
482 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
483 %1 = extractelement <1 x i64> %b, i32 0
484 %mmx_var1.i = bitcast i64 %1 to x86_mmx
485 %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
486 %3 = bitcast x86_mmx %2 to <4 x i16>
487 %4 = bitcast <4 x i16> %3 to <1 x i64>
488 %5 = extractelement <1 x i64> %4, i32 0
492 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
494 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
497 %0 = bitcast <1 x i64> %b to <2 x i32>
498 %1 = bitcast <1 x i64> %a to <2 x i32>
499 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
500 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
501 %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
502 %3 = bitcast x86_mmx %2 to <2 x i32>
503 %4 = bitcast <2 x i32> %3 to <1 x i64>
504 %5 = extractelement <1 x i64> %4, i32 0
508 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
510 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
513 %0 = bitcast <1 x i64> %b to <2 x i32>
514 %1 = bitcast <1 x i64> %a to <2 x i32>
515 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
516 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
517 %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
518 %3 = bitcast x86_mmx %2 to <2 x i32>
519 %4 = bitcast <2 x i32> %3 to <1 x i64>
520 %5 = extractelement <1 x i64> %4, i32 0
524 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
526 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
529 %0 = bitcast <1 x i64> %b to <2 x i32>
530 %1 = bitcast <1 x i64> %a to <2 x i32>
531 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
532 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
533 %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
534 %3 = bitcast x86_mmx %2 to <2 x i32>
535 %4 = bitcast <2 x i32> %3 to <1 x i64>
536 %5 = extractelement <1 x i64> %4, i32 0
540 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
542 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
545 %0 = bitcast <1 x i64> %b to <2 x i32>
546 %1 = bitcast <1 x i64> %a to <2 x i32>
547 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
548 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
549 %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
550 %3 = bitcast x86_mmx %2 to <2 x i32>
551 %4 = bitcast <2 x i32> %3 to <1 x i64>
552 %5 = extractelement <1 x i64> %4, i32 0
556 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
558 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
561 %0 = bitcast <1 x i64> %b to <4 x i16>
562 %1 = bitcast <1 x i64> %a to <4 x i16>
563 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
564 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
565 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
566 %3 = bitcast x86_mmx %2 to <4 x i16>
567 %4 = bitcast <4 x i16> %3 to <1 x i64>
568 %5 = extractelement <1 x i64> %4, i32 0
572 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
575 %0 = bitcast <1 x i64> %b to <4 x i16>
576 %1 = bitcast <1 x i64> %a to <4 x i16>
577 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
578 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
579 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
580 %3 = bitcast x86_mmx %2 to <4 x i16>
581 %4 = bitcast <4 x i16> %3 to <1 x i64>
582 %5 = extractelement <1 x i64> %4, i32 0
586 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
588 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
591 %0 = bitcast <1 x i64> %b to <4 x i16>
592 %1 = bitcast <1 x i64> %a to <4 x i16>
593 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
594 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
595 %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
596 %3 = bitcast x86_mmx %2 to <4 x i16>
597 %4 = bitcast <4 x i16> %3 to <1 x i64>
598 %5 = extractelement <1 x i64> %4, i32 0
602 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
604 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
607 %0 = bitcast <1 x i64> %b to <4 x i16>
608 %1 = bitcast <1 x i64> %a to <4 x i16>
609 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
610 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
611 %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
612 %3 = bitcast x86_mmx %2 to <2 x i32>
613 %4 = bitcast <2 x i32> %3 to <1 x i64>
614 %5 = extractelement <1 x i64> %4, i32 0
618 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
620 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
623 %0 = bitcast <1 x i64> %b to <4 x i16>
624 %1 = bitcast <1 x i64> %a to <4 x i16>
625 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
626 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
627 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
628 %3 = bitcast x86_mmx %2 to <4 x i16>
629 %4 = bitcast <4 x i16> %3 to <1 x i64>
630 %5 = extractelement <1 x i64> %4, i32 0
634 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
636 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
639 %0 = bitcast <1 x i64> %b to <8 x i8>
640 %1 = bitcast <1 x i64> %a to <8 x i8>
641 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
642 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
643 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
644 %3 = bitcast x86_mmx %2 to <8 x i8>
645 %4 = bitcast <8 x i8> %3 to <1 x i64>
646 %5 = extractelement <1 x i64> %4, i32 0
650 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
652 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
655 %0 = bitcast <1 x i64> %b to <4 x i16>
656 %1 = bitcast <1 x i64> %a to <4 x i16>
657 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
658 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
659 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
660 %3 = bitcast x86_mmx %2 to <4 x i16>
661 %4 = bitcast <4 x i16> %3 to <1 x i64>
662 %5 = extractelement <1 x i64> %4, i32 0
666 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
668 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
671 %0 = bitcast <1 x i64> %b to <8 x i8>
672 %1 = bitcast <1 x i64> %a to <8 x i8>
673 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
674 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
675 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
676 %3 = bitcast x86_mmx %2 to <8 x i8>
677 %4 = bitcast <8 x i8> %3 to <1 x i64>
678 %5 = extractelement <1 x i64> %4, i32 0
682 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
685 %0 = extractelement <1 x i64> %a, i32 0
686 %mmx_var = bitcast i64 %0 to x86_mmx
687 %1 = extractelement <1 x i64> %b, i32 0
688 %mmx_var1 = bitcast i64 %1 to x86_mmx
689 %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
690 %3 = bitcast x86_mmx %2 to i64
694 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
696 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
698 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
701 %0 = bitcast <1 x i64> %b to <2 x i32>
702 %1 = bitcast <1 x i64> %a to <2 x i32>
703 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
704 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
705 %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
706 %3 = bitcast x86_mmx %2 to <2 x i32>
707 %4 = bitcast <2 x i32> %3 to <1 x i64>
708 %5 = extractelement <1 x i64> %4, i32 0
712 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
714 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
717 %0 = bitcast <1 x i64> %b to <4 x i16>
718 %1 = bitcast <1 x i64> %a to <4 x i16>
719 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
720 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
721 %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
722 %3 = bitcast x86_mmx %2 to <4 x i16>
723 %4 = bitcast <4 x i16> %3 to <1 x i64>
724 %5 = extractelement <1 x i64> %4, i32 0
728 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
730 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
733 %0 = bitcast <1 x i64> %b to <8 x i8>
734 %1 = bitcast <1 x i64> %a to <8 x i8>
735 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
736 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
737 %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
738 %3 = bitcast x86_mmx %2 to <8 x i8>
739 %4 = bitcast <8 x i8> %3 to <1 x i64>
740 %5 = extractelement <1 x i64> %4, i32 0
744 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
746 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
749 %0 = bitcast <1 x i64> %b to <4 x i16>
750 %1 = bitcast <1 x i64> %a to <4 x i16>
751 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
752 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
753 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
754 %3 = bitcast x86_mmx %2 to <4 x i16>
755 %4 = bitcast <4 x i16> %3 to <1 x i64>
756 %5 = extractelement <1 x i64> %4, i32 0
760 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
762 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
765 %0 = bitcast <1 x i64> %b to <8 x i8>
766 %1 = bitcast <1 x i64> %a to <8 x i8>
767 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
768 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
769 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
770 %3 = bitcast x86_mmx %2 to <8 x i8>
771 %4 = bitcast <8 x i8> %3 to <1 x i64>
772 %5 = extractelement <1 x i64> %4, i32 0
776 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
778 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
781 %0 = bitcast <1 x i64> %b to <4 x i16>
782 %1 = bitcast <1 x i64> %a to <4 x i16>
783 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
784 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
785 %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
786 %3 = bitcast x86_mmx %2 to <4 x i16>
787 %4 = bitcast <4 x i16> %3 to <1 x i64>
788 %5 = extractelement <1 x i64> %4, i32 0
792 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
794 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
797 %0 = bitcast <1 x i64> %b to <8 x i8>
798 %1 = bitcast <1 x i64> %a to <8 x i8>
799 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
800 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
801 %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
802 %3 = bitcast x86_mmx %2 to <8 x i8>
803 %4 = bitcast <8 x i8> %3 to <1 x i64>
804 %5 = extractelement <1 x i64> %4, i32 0
808 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
810 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
813 %0 = extractelement <1 x i64> %a, i32 0
814 %mmx_var = bitcast i64 %0 to x86_mmx
815 %1 = extractelement <1 x i64> %b, i32 0
816 %mmx_var1 = bitcast i64 %1 to x86_mmx
817 %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
818 %3 = bitcast x86_mmx %2 to i64
822 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
824 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
827 %0 = bitcast <1 x i64> %b to <2 x i32>
828 %1 = bitcast <1 x i64> %a to <2 x i32>
829 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
830 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
831 %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
832 %3 = bitcast x86_mmx %2 to <2 x i32>
833 %4 = bitcast <2 x i32> %3 to <1 x i64>
834 %5 = extractelement <1 x i64> %4, i32 0
838 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
840 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
843 %0 = bitcast <1 x i64> %b to <4 x i16>
844 %1 = bitcast <1 x i64> %a to <4 x i16>
845 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
846 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
847 %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
848 %3 = bitcast x86_mmx %2 to <4 x i16>
849 %4 = bitcast <4 x i16> %3 to <1 x i64>
850 %5 = extractelement <1 x i64> %4, i32 0
854 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
856 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
859 %0 = bitcast <1 x i64> %b to <8 x i8>
860 %1 = bitcast <1 x i64> %a to <8 x i8>
861 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
862 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
863 %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
864 %3 = bitcast x86_mmx %2 to <8 x i8>
865 %4 = bitcast <8 x i8> %3 to <1 x i64>
866 %5 = extractelement <1 x i64> %4, i32 0
870 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
872 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
875 %0 = bitcast <1 x i64> %b to <8 x i8>
876 %1 = bitcast <1 x i64> %a to <8 x i8>
877 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
878 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
879 %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
880 %3 = bitcast x86_mmx %2 to i64
884 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
886 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
889 %0 = bitcast <1 x i64> %b to <4 x i16>
890 %1 = bitcast <1 x i64> %a to <4 x i16>
891 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
892 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
893 %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
894 %3 = bitcast x86_mmx %2 to <4 x i16>
895 %4 = bitcast <4 x i16> %3 to <1 x i64>
896 %5 = extractelement <1 x i64> %4, i32 0
900 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
902 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
905 %0 = bitcast <1 x i64> %b to <8 x i8>
906 %1 = bitcast <1 x i64> %a to <8 x i8>
907 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
908 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
909 %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
910 %3 = bitcast x86_mmx %2 to <8 x i8>
911 %4 = bitcast <8 x i8> %3 to <1 x i64>
912 %5 = extractelement <1 x i64> %4, i32 0
916 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
918 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
921 %0 = bitcast <1 x i64> %b to <4 x i16>
922 %1 = bitcast <1 x i64> %a to <4 x i16>
923 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
924 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
925 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
926 %3 = bitcast x86_mmx %2 to <4 x i16>
927 %4 = bitcast <4 x i16> %3 to <1 x i64>
928 %5 = extractelement <1 x i64> %4, i32 0
932 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
934 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
937 %0 = bitcast <1 x i64> %b to <8 x i8>
938 %1 = bitcast <1 x i64> %a to <8 x i8>
939 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
940 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
941 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
942 %3 = bitcast x86_mmx %2 to <8 x i8>
943 %4 = bitcast <8 x i8> %3 to <1 x i64>
944 %5 = extractelement <1 x i64> %4, i32 0
948 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
950 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
953 %0 = bitcast <1 x i64> %b to <4 x i16>
954 %1 = bitcast <1 x i64> %a to <4 x i16>
955 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
956 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
957 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
958 %3 = bitcast x86_mmx %2 to <4 x i16>
959 %4 = bitcast <4 x i16> %3 to <1 x i64>
960 %5 = extractelement <1 x i64> %4, i32 0
964 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
966 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
969 %0 = bitcast <1 x i64> %b to <8 x i8>
970 %1 = bitcast <1 x i64> %a to <8 x i8>
971 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
972 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
973 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
974 %3 = bitcast x86_mmx %2 to <8 x i8>
975 %4 = bitcast <8 x i8> %3 to <1 x i64>
976 %5 = extractelement <1 x i64> %4, i32 0
980 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
982 define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
985 %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
986 %0 = extractelement <1 x i64> %a, i32 0
987 %mmx_var.i = bitcast i64 %0 to x86_mmx
988 tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
992 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
994 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
997 %0 = bitcast <1 x i64> %a to <8 x i8>
998 %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
999 %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
1003 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
1005 define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
1008 %0 = bitcast <1 x i64> %n to <8 x i8>
1009 %1 = bitcast <1 x i64> %d to <8 x i8>
1010 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1011 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1012 tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
1016 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1018 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1021 %0 = bitcast <1 x i64> %b to <4 x i16>
1022 %1 = bitcast <1 x i64> %a to <4 x i16>
1023 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1024 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1025 %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1026 %3 = bitcast x86_mmx %2 to <4 x i16>
1027 %4 = bitcast <4 x i16> %3 to <1 x i64>
1028 %5 = extractelement <1 x i64> %4, i32 0
1032 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
1034 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1037 %0 = bitcast <1 x i64> %a to <4 x i16>
1038 %1 = bitcast <4 x i16> %0 to x86_mmx
1039 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1040 %3 = bitcast x86_mmx %2 to <4 x i16>
1041 %4 = bitcast <4 x i16> %3 to <1 x i64>
1042 %5 = extractelement <1 x i64> %4, i32 0
1046 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1048 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1051 %0 = bitcast <1 x i64> %b to <2 x i32>
1052 %1 = bitcast <1 x i64> %a to <2 x i32>
1053 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1054 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1055 %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1056 %3 = bitcast x86_mmx %2 to i64
1060 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1062 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1065 %0 = bitcast <1 x i64> %a to <2 x i32>
1066 %1 = bitcast <2 x i32> %0 to x86_mmx
1067 %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1071 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1073 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1076 %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1077 %1 = bitcast x86_mmx %0 to <2 x i32>
1078 %2 = bitcast <2 x i32> %1 to <1 x i64>
1079 %3 = extractelement <1 x i64> %2, i32 0
1083 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1085 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1088 %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1089 %1 = bitcast x86_mmx %0 to <2 x i32>
1090 %2 = bitcast <2 x i32> %1 to <1 x i64>
1091 %3 = extractelement <1 x i64> %2, i32 0
1095 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1097 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1100 %0 = extractelement <1 x i64> %a, i32 0
1101 %mmx_var = bitcast i64 %0 to x86_mmx
1102 %1 = extractelement <1 x i64> %b, i32 0
1103 %mmx_var1 = bitcast i64 %1 to x86_mmx
1104 %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1105 %3 = bitcast x86_mmx %2 to i64
1109 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1111 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1114 %0 = bitcast <1 x i64> %a to <2 x i32>
1115 %1 = bitcast <2 x i32> %0 to x86_mmx
1116 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1117 %3 = bitcast x86_mmx %2 to <2 x i32>
1118 %4 = bitcast <2 x i32> %3 to <1 x i64>
1119 %5 = extractelement <1 x i64> %4, i32 0
1123 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1125 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1128 %0 = bitcast <1 x i64> %a to <4 x i16>
1129 %1 = bitcast <4 x i16> %0 to x86_mmx
1130 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1131 %3 = bitcast x86_mmx %2 to <4 x i16>
1132 %4 = bitcast <4 x i16> %3 to <1 x i64>
1133 %5 = extractelement <1 x i64> %4, i32 0
1137 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1139 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1142 %0 = bitcast <1 x i64> %a to <8 x i8>
1143 %1 = bitcast <8 x i8> %0 to x86_mmx
1144 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1145 %3 = bitcast x86_mmx %2 to <8 x i8>
1146 %4 = bitcast <8 x i8> %3 to <1 x i64>
1147 %5 = extractelement <1 x i64> %4, i32 0
1151 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1153 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1156 %0 = bitcast <1 x i64> %b to <2 x i32>
1157 %1 = bitcast <1 x i64> %a to <2 x i32>
1158 %2 = bitcast <2 x i32> %1 to x86_mmx
1159 %3 = bitcast <2 x i32> %0 to x86_mmx
1160 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1161 %5 = bitcast x86_mmx %4 to <2 x i32>
1162 %6 = bitcast <2 x i32> %5 to <1 x i64>
1163 %7 = extractelement <1 x i64> %6, i32 0
1167 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1169 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1172 %0 = bitcast <1 x i64> %b to <4 x i16>
1173 %1 = bitcast <1 x i64> %a to <4 x i16>
1174 %2 = bitcast <4 x i16> %1 to x86_mmx
1175 %3 = bitcast <4 x i16> %0 to x86_mmx
1176 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1177 %5 = bitcast x86_mmx %4 to <4 x i16>
1178 %6 = bitcast <4 x i16> %5 to <1 x i64>
1179 %7 = extractelement <1 x i64> %6, i32 0
1183 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1185 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1188 %0 = bitcast <1 x i64> %b to <8 x i8>
1189 %1 = bitcast <1 x i64> %a to <8 x i8>
1190 %2 = bitcast <8 x i8> %1 to x86_mmx
1191 %3 = bitcast <8 x i8> %0 to x86_mmx
1192 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1193 %5 = bitcast x86_mmx %4 to <8 x i8>
1194 %6 = bitcast <8 x i8> %5 to <1 x i64>
1195 %7 = extractelement <1 x i64> %6, i32 0
1199 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1201 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1204 %0 = bitcast <1 x i64> %b to <8 x i8>
1205 %1 = bitcast <1 x i64> %a to <8 x i8>
1206 %2 = bitcast <8 x i8> %1 to x86_mmx
1207 %3 = bitcast <8 x i8> %0 to x86_mmx
1208 %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1209 %5 = bitcast x86_mmx %4 to <8 x i8>
1210 %6 = bitcast <8 x i8> %5 to <1 x i64>
1211 %7 = extractelement <1 x i64> %6, i32 0
1215 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1217 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1220 %0 = bitcast <1 x i64> %b to <4 x i16>
1221 %1 = bitcast <1 x i64> %a to <4 x i16>
1222 %2 = bitcast <4 x i16> %1 to x86_mmx
1223 %3 = bitcast <4 x i16> %0 to x86_mmx
1224 %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1225 %5 = bitcast x86_mmx %4 to <4 x i16>
1226 %6 = bitcast <4 x i16> %5 to <1 x i64>
1227 %7 = extractelement <1 x i64> %6, i32 0
1231 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1233 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1236 %0 = bitcast <1 x i64> %b to <8 x i8>
1237 %1 = bitcast <1 x i64> %a to <8 x i8>
1238 %2 = bitcast <8 x i8> %1 to x86_mmx
1239 %3 = bitcast <8 x i8> %0 to x86_mmx
1240 %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1241 %5 = bitcast x86_mmx %4 to <8 x i8>
1242 %6 = bitcast <8 x i8> %5 to <1 x i64>
1243 %7 = extractelement <1 x i64> %6, i32 0
1247 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1249 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1252 %0 = bitcast <1 x i64> %b to <4 x i16>
1253 %1 = bitcast <1 x i64> %a to <4 x i16>
1254 %2 = bitcast <4 x i16> %1 to x86_mmx
1255 %3 = bitcast <4 x i16> %0 to x86_mmx
1256 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1257 %5 = bitcast x86_mmx %4 to <4 x i16>
1258 %6 = bitcast <4 x i16> %5 to <1 x i64>
1259 %7 = extractelement <1 x i64> %6, i32 0
1263 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1265 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1268 %0 = bitcast <1 x i64> %b to <2 x i32>
1269 %1 = bitcast <1 x i64> %a to <2 x i32>
1270 %2 = bitcast <2 x i32> %1 to x86_mmx
1271 %3 = bitcast <2 x i32> %0 to x86_mmx
1272 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1273 %5 = bitcast x86_mmx %4 to <2 x i32>
1274 %6 = bitcast <2 x i32> %5 to <1 x i64>
1275 %7 = extractelement <1 x i64> %6, i32 0
1279 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1281 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1284 %0 = bitcast <1 x i64> %b to <4 x i16>
1285 %1 = bitcast <1 x i64> %a to <4 x i16>
1286 %2 = bitcast <4 x i16> %1 to x86_mmx
1287 %3 = bitcast <4 x i16> %0 to x86_mmx
1288 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1289 %5 = bitcast x86_mmx %4 to <4 x i16>
1290 %6 = bitcast <4 x i16> %5 to <1 x i64>
1291 %7 = extractelement <1 x i64> %6, i32 0
1295 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1297 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1300 %0 = bitcast <1 x i64> %b to <4 x i16>
1301 %1 = bitcast <1 x i64> %a to <4 x i16>
1302 %2 = bitcast <4 x i16> %1 to x86_mmx
1303 %3 = bitcast <4 x i16> %0 to x86_mmx
1304 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1305 %5 = bitcast x86_mmx %4 to <4 x i16>
1306 %6 = bitcast <4 x i16> %5 to <1 x i64>
1307 %7 = extractelement <1 x i64> %6, i32 0
1311 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1313 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1316 %0 = bitcast <1 x i64> %b to <2 x i32>
1317 %1 = bitcast <1 x i64> %a to <2 x i32>
1318 %2 = bitcast <2 x i32> %1 to x86_mmx
1319 %3 = bitcast <2 x i32> %0 to x86_mmx
1320 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1321 %5 = bitcast x86_mmx %4 to <2 x i32>
1322 %6 = bitcast <2 x i32> %5 to <1 x i64>
1323 %7 = extractelement <1 x i64> %6, i32 0