1 ; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
2 ; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
3 ; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
4 ; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
6 define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
10 vector.body: ; preds = %vector.body, %vector.ph
11 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
12 %gep.a = getelementptr inbounds i8* %a, i64 %index
13 %gep.b = getelementptr inbounds i8* %b, i64 %index
14 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
15 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
16 %load.a = load <16 x i8>* %ptr.a, align 2
17 %load.b = load <16 x i8>* %ptr.b, align 2
18 %cmp = icmp slt <16 x i8> %load.a, %load.b
19 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
20 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
21 %index.next = add i64 %index, 16
22 %loop = icmp eq i64 %index.next, 16384
23 br i1 %loop, label %for.end, label %vector.body
25 for.end: ; preds = %vector.body
38 define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
42 vector.body: ; preds = %vector.body, %vector.ph
43 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
44 %gep.a = getelementptr inbounds i8* %a, i64 %index
45 %gep.b = getelementptr inbounds i8* %b, i64 %index
46 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
47 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
48 %load.a = load <16 x i8>* %ptr.a, align 2
49 %load.b = load <16 x i8>* %ptr.b, align 2
50 %cmp = icmp sle <16 x i8> %load.a, %load.b
51 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
52 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
53 %index.next = add i64 %index, 16
54 %loop = icmp eq i64 %index.next, 16384
55 br i1 %loop, label %for.end, label %vector.body
57 for.end: ; preds = %vector.body
70 define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
74 vector.body: ; preds = %vector.body, %vector.ph
75 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
76 %gep.a = getelementptr inbounds i8* %a, i64 %index
77 %gep.b = getelementptr inbounds i8* %b, i64 %index
78 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
79 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
80 %load.a = load <16 x i8>* %ptr.a, align 2
81 %load.b = load <16 x i8>* %ptr.b, align 2
82 %cmp = icmp sgt <16 x i8> %load.a, %load.b
83 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
84 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
85 %index.next = add i64 %index, 16
86 %loop = icmp eq i64 %index.next, 16384
87 br i1 %loop, label %for.end, label %vector.body
89 for.end: ; preds = %vector.body
102 define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
104 br label %vector.body
106 vector.body: ; preds = %vector.body, %vector.ph
107 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
108 %gep.a = getelementptr inbounds i8* %a, i64 %index
109 %gep.b = getelementptr inbounds i8* %b, i64 %index
110 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
111 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
112 %load.a = load <16 x i8>* %ptr.a, align 2
113 %load.b = load <16 x i8>* %ptr.b, align 2
114 %cmp = icmp sge <16 x i8> %load.a, %load.b
115 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
116 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
117 %index.next = add i64 %index, 16
118 %loop = icmp eq i64 %index.next, 16384
119 br i1 %loop, label %for.end, label %vector.body
121 for.end: ; preds = %vector.body
134 define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
136 br label %vector.body
138 vector.body: ; preds = %vector.body, %vector.ph
139 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
140 %gep.a = getelementptr inbounds i8* %a, i64 %index
141 %gep.b = getelementptr inbounds i8* %b, i64 %index
142 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
143 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
144 %load.a = load <16 x i8>* %ptr.a, align 2
145 %load.b = load <16 x i8>* %ptr.b, align 2
146 %cmp = icmp ult <16 x i8> %load.a, %load.b
147 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
148 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
149 %index.next = add i64 %index, 16
150 %loop = icmp eq i64 %index.next, 16384
151 br i1 %loop, label %for.end, label %vector.body
153 for.end: ; preds = %vector.body
166 define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
168 br label %vector.body
170 vector.body: ; preds = %vector.body, %vector.ph
171 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
172 %gep.a = getelementptr inbounds i8* %a, i64 %index
173 %gep.b = getelementptr inbounds i8* %b, i64 %index
174 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
175 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
176 %load.a = load <16 x i8>* %ptr.a, align 2
177 %load.b = load <16 x i8>* %ptr.b, align 2
178 %cmp = icmp ule <16 x i8> %load.a, %load.b
179 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
180 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
181 %index.next = add i64 %index, 16
182 %loop = icmp eq i64 %index.next, 16384
183 br i1 %loop, label %for.end, label %vector.body
185 for.end: ; preds = %vector.body
198 define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
200 br label %vector.body
202 vector.body: ; preds = %vector.body, %vector.ph
203 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
204 %gep.a = getelementptr inbounds i8* %a, i64 %index
205 %gep.b = getelementptr inbounds i8* %b, i64 %index
206 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
207 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
208 %load.a = load <16 x i8>* %ptr.a, align 2
209 %load.b = load <16 x i8>* %ptr.b, align 2
210 %cmp = icmp ugt <16 x i8> %load.a, %load.b
211 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
212 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
213 %index.next = add i64 %index, 16
214 %loop = icmp eq i64 %index.next, 16384
215 br i1 %loop, label %for.end, label %vector.body
217 for.end: ; preds = %vector.body
230 define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
232 br label %vector.body
234 vector.body: ; preds = %vector.body, %vector.ph
235 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
236 %gep.a = getelementptr inbounds i8* %a, i64 %index
237 %gep.b = getelementptr inbounds i8* %b, i64 %index
238 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
239 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
240 %load.a = load <16 x i8>* %ptr.a, align 2
241 %load.b = load <16 x i8>* %ptr.b, align 2
242 %cmp = icmp uge <16 x i8> %load.a, %load.b
243 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
244 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
245 %index.next = add i64 %index, 16
246 %loop = icmp eq i64 %index.next, 16384
247 br i1 %loop, label %for.end, label %vector.body
249 for.end: ; preds = %vector.body
262 define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
264 br label %vector.body
266 vector.body: ; preds = %vector.body, %vector.ph
267 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
268 %gep.a = getelementptr inbounds i16* %a, i64 %index
269 %gep.b = getelementptr inbounds i16* %b, i64 %index
270 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
271 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
272 %load.a = load <8 x i16>* %ptr.a, align 2
273 %load.b = load <8 x i16>* %ptr.b, align 2
274 %cmp = icmp slt <8 x i16> %load.a, %load.b
275 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
276 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
277 %index.next = add i64 %index, 8
278 %loop = icmp eq i64 %index.next, 16384
279 br i1 %loop, label %for.end, label %vector.body
281 for.end: ; preds = %vector.body
294 define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
296 br label %vector.body
298 vector.body: ; preds = %vector.body, %vector.ph
299 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
300 %gep.a = getelementptr inbounds i16* %a, i64 %index
301 %gep.b = getelementptr inbounds i16* %b, i64 %index
302 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
303 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
304 %load.a = load <8 x i16>* %ptr.a, align 2
305 %load.b = load <8 x i16>* %ptr.b, align 2
306 %cmp = icmp sle <8 x i16> %load.a, %load.b
307 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
308 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
309 %index.next = add i64 %index, 8
310 %loop = icmp eq i64 %index.next, 16384
311 br i1 %loop, label %for.end, label %vector.body
313 for.end: ; preds = %vector.body
316 ; SSE2-LABEL: test10:
319 ; AVX1-LABEL: test10:
322 ; AVX2-LABEL: test10:
326 define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
328 br label %vector.body
330 vector.body: ; preds = %vector.body, %vector.ph
331 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
332 %gep.a = getelementptr inbounds i16* %a, i64 %index
333 %gep.b = getelementptr inbounds i16* %b, i64 %index
334 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
335 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
336 %load.a = load <8 x i16>* %ptr.a, align 2
337 %load.b = load <8 x i16>* %ptr.b, align 2
338 %cmp = icmp sgt <8 x i16> %load.a, %load.b
339 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
340 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
341 %index.next = add i64 %index, 8
342 %loop = icmp eq i64 %index.next, 16384
343 br i1 %loop, label %for.end, label %vector.body
345 for.end: ; preds = %vector.body
348 ; SSE2-LABEL: test11:
351 ; AVX1-LABEL: test11:
354 ; AVX2-LABEL: test11:
358 define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
360 br label %vector.body
362 vector.body: ; preds = %vector.body, %vector.ph
363 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
364 %gep.a = getelementptr inbounds i16* %a, i64 %index
365 %gep.b = getelementptr inbounds i16* %b, i64 %index
366 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
367 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
368 %load.a = load <8 x i16>* %ptr.a, align 2
369 %load.b = load <8 x i16>* %ptr.b, align 2
370 %cmp = icmp sge <8 x i16> %load.a, %load.b
371 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
372 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
373 %index.next = add i64 %index, 8
374 %loop = icmp eq i64 %index.next, 16384
375 br i1 %loop, label %for.end, label %vector.body
377 for.end: ; preds = %vector.body
380 ; SSE2-LABEL: test12:
383 ; AVX1-LABEL: test12:
386 ; AVX2-LABEL: test12:
390 define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
392 br label %vector.body
394 vector.body: ; preds = %vector.body, %vector.ph
395 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
396 %gep.a = getelementptr inbounds i16* %a, i64 %index
397 %gep.b = getelementptr inbounds i16* %b, i64 %index
398 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
399 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
400 %load.a = load <8 x i16>* %ptr.a, align 2
401 %load.b = load <8 x i16>* %ptr.b, align 2
402 %cmp = icmp ult <8 x i16> %load.a, %load.b
403 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
404 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
405 %index.next = add i64 %index, 8
406 %loop = icmp eq i64 %index.next, 16384
407 br i1 %loop, label %for.end, label %vector.body
409 for.end: ; preds = %vector.body
412 ; SSE4-LABEL: test13:
415 ; AVX1-LABEL: test13:
418 ; AVX2-LABEL: test13:
422 define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
424 br label %vector.body
426 vector.body: ; preds = %vector.body, %vector.ph
427 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
428 %gep.a = getelementptr inbounds i16* %a, i64 %index
429 %gep.b = getelementptr inbounds i16* %b, i64 %index
430 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
431 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
432 %load.a = load <8 x i16>* %ptr.a, align 2
433 %load.b = load <8 x i16>* %ptr.b, align 2
434 %cmp = icmp ule <8 x i16> %load.a, %load.b
435 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
436 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
437 %index.next = add i64 %index, 8
438 %loop = icmp eq i64 %index.next, 16384
439 br i1 %loop, label %for.end, label %vector.body
441 for.end: ; preds = %vector.body
444 ; SSE4-LABEL: test14:
447 ; AVX1-LABEL: test14:
450 ; AVX2-LABEL: test14:
454 define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
456 br label %vector.body
458 vector.body: ; preds = %vector.body, %vector.ph
459 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
460 %gep.a = getelementptr inbounds i16* %a, i64 %index
461 %gep.b = getelementptr inbounds i16* %b, i64 %index
462 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
463 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
464 %load.a = load <8 x i16>* %ptr.a, align 2
465 %load.b = load <8 x i16>* %ptr.b, align 2
466 %cmp = icmp ugt <8 x i16> %load.a, %load.b
467 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
468 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
469 %index.next = add i64 %index, 8
470 %loop = icmp eq i64 %index.next, 16384
471 br i1 %loop, label %for.end, label %vector.body
473 for.end: ; preds = %vector.body
476 ; SSE4-LABEL: test15:
479 ; AVX1-LABEL: test15:
482 ; AVX2-LABEL: test15:
486 define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
488 br label %vector.body
490 vector.body: ; preds = %vector.body, %vector.ph
491 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
492 %gep.a = getelementptr inbounds i16* %a, i64 %index
493 %gep.b = getelementptr inbounds i16* %b, i64 %index
494 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
495 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
496 %load.a = load <8 x i16>* %ptr.a, align 2
497 %load.b = load <8 x i16>* %ptr.b, align 2
498 %cmp = icmp uge <8 x i16> %load.a, %load.b
499 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
500 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
501 %index.next = add i64 %index, 8
502 %loop = icmp eq i64 %index.next, 16384
503 br i1 %loop, label %for.end, label %vector.body
505 for.end: ; preds = %vector.body
508 ; SSE4-LABEL: test16:
511 ; AVX1-LABEL: test16:
514 ; AVX2-LABEL: test16:
518 define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
520 br label %vector.body
522 vector.body: ; preds = %vector.body, %vector.ph
523 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
524 %gep.a = getelementptr inbounds i32* %a, i64 %index
525 %gep.b = getelementptr inbounds i32* %b, i64 %index
526 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
527 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
528 %load.a = load <4 x i32>* %ptr.a, align 2
529 %load.b = load <4 x i32>* %ptr.b, align 2
530 %cmp = icmp slt <4 x i32> %load.a, %load.b
531 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
532 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
533 %index.next = add i64 %index, 4
534 %loop = icmp eq i64 %index.next, 16384
535 br i1 %loop, label %for.end, label %vector.body
537 for.end: ; preds = %vector.body
540 ; SSE4-LABEL: test17:
543 ; AVX1-LABEL: test17:
546 ; AVX2-LABEL: test17:
550 define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
552 br label %vector.body
554 vector.body: ; preds = %vector.body, %vector.ph
555 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
556 %gep.a = getelementptr inbounds i32* %a, i64 %index
557 %gep.b = getelementptr inbounds i32* %b, i64 %index
558 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
559 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
560 %load.a = load <4 x i32>* %ptr.a, align 2
561 %load.b = load <4 x i32>* %ptr.b, align 2
562 %cmp = icmp sle <4 x i32> %load.a, %load.b
563 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
564 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
565 %index.next = add i64 %index, 4
566 %loop = icmp eq i64 %index.next, 16384
567 br i1 %loop, label %for.end, label %vector.body
569 for.end: ; preds = %vector.body
572 ; SSE4-LABEL: test18:
575 ; AVX1-LABEL: test18:
578 ; AVX2-LABEL: test18:
582 define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
584 br label %vector.body
586 vector.body: ; preds = %vector.body, %vector.ph
587 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
588 %gep.a = getelementptr inbounds i32* %a, i64 %index
589 %gep.b = getelementptr inbounds i32* %b, i64 %index
590 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
591 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
592 %load.a = load <4 x i32>* %ptr.a, align 2
593 %load.b = load <4 x i32>* %ptr.b, align 2
594 %cmp = icmp sgt <4 x i32> %load.a, %load.b
595 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
596 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
597 %index.next = add i64 %index, 4
598 %loop = icmp eq i64 %index.next, 16384
599 br i1 %loop, label %for.end, label %vector.body
601 for.end: ; preds = %vector.body
604 ; SSE4-LABEL: test19:
607 ; AVX1-LABEL: test19:
610 ; AVX2-LABEL: test19:
614 define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
616 br label %vector.body
618 vector.body: ; preds = %vector.body, %vector.ph
619 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
620 %gep.a = getelementptr inbounds i32* %a, i64 %index
621 %gep.b = getelementptr inbounds i32* %b, i64 %index
622 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
623 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
624 %load.a = load <4 x i32>* %ptr.a, align 2
625 %load.b = load <4 x i32>* %ptr.b, align 2
626 %cmp = icmp sge <4 x i32> %load.a, %load.b
627 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
628 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
629 %index.next = add i64 %index, 4
630 %loop = icmp eq i64 %index.next, 16384
631 br i1 %loop, label %for.end, label %vector.body
633 for.end: ; preds = %vector.body
636 ; SSE4-LABEL: test20:
639 ; AVX1-LABEL: test20:
642 ; AVX2-LABEL: test20:
646 define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
648 br label %vector.body
650 vector.body: ; preds = %vector.body, %vector.ph
651 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
652 %gep.a = getelementptr inbounds i32* %a, i64 %index
653 %gep.b = getelementptr inbounds i32* %b, i64 %index
654 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
655 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
656 %load.a = load <4 x i32>* %ptr.a, align 2
657 %load.b = load <4 x i32>* %ptr.b, align 2
658 %cmp = icmp ult <4 x i32> %load.a, %load.b
659 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
660 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
661 %index.next = add i64 %index, 4
662 %loop = icmp eq i64 %index.next, 16384
663 br i1 %loop, label %for.end, label %vector.body
665 for.end: ; preds = %vector.body
668 ; SSE4-LABEL: test21:
671 ; AVX1-LABEL: test21:
674 ; AVX2-LABEL: test21:
678 define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
680 br label %vector.body
682 vector.body: ; preds = %vector.body, %vector.ph
683 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
684 %gep.a = getelementptr inbounds i32* %a, i64 %index
685 %gep.b = getelementptr inbounds i32* %b, i64 %index
686 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
687 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
688 %load.a = load <4 x i32>* %ptr.a, align 2
689 %load.b = load <4 x i32>* %ptr.b, align 2
690 %cmp = icmp ule <4 x i32> %load.a, %load.b
691 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
692 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
693 %index.next = add i64 %index, 4
694 %loop = icmp eq i64 %index.next, 16384
695 br i1 %loop, label %for.end, label %vector.body
697 for.end: ; preds = %vector.body
700 ; SSE4-LABEL: test22:
703 ; AVX1-LABEL: test22:
706 ; AVX2-LABEL: test22:
710 define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
712 br label %vector.body
714 vector.body: ; preds = %vector.body, %vector.ph
715 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
716 %gep.a = getelementptr inbounds i32* %a, i64 %index
717 %gep.b = getelementptr inbounds i32* %b, i64 %index
718 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
719 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
720 %load.a = load <4 x i32>* %ptr.a, align 2
721 %load.b = load <4 x i32>* %ptr.b, align 2
722 %cmp = icmp ugt <4 x i32> %load.a, %load.b
723 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
724 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
725 %index.next = add i64 %index, 4
726 %loop = icmp eq i64 %index.next, 16384
727 br i1 %loop, label %for.end, label %vector.body
729 for.end: ; preds = %vector.body
732 ; SSE4-LABEL: test23:
735 ; AVX1-LABEL: test23:
738 ; AVX2-LABEL: test23:
742 define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
744 br label %vector.body
746 vector.body: ; preds = %vector.body, %vector.ph
747 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
748 %gep.a = getelementptr inbounds i32* %a, i64 %index
749 %gep.b = getelementptr inbounds i32* %b, i64 %index
750 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
751 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
752 %load.a = load <4 x i32>* %ptr.a, align 2
753 %load.b = load <4 x i32>* %ptr.b, align 2
754 %cmp = icmp uge <4 x i32> %load.a, %load.b
755 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
756 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
757 %index.next = add i64 %index, 4
758 %loop = icmp eq i64 %index.next, 16384
759 br i1 %loop, label %for.end, label %vector.body
761 for.end: ; preds = %vector.body
764 ; SSE4-LABEL: test24:
767 ; AVX1-LABEL: test24:
770 ; AVX2-LABEL: test24:
774 define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
776 br label %vector.body
778 vector.body: ; preds = %vector.body, %vector.ph
779 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
780 %gep.a = getelementptr inbounds i8* %a, i64 %index
781 %gep.b = getelementptr inbounds i8* %b, i64 %index
782 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
783 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
784 %load.a = load <32 x i8>* %ptr.a, align 2
785 %load.b = load <32 x i8>* %ptr.b, align 2
786 %cmp = icmp slt <32 x i8> %load.a, %load.b
787 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
788 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
789 %index.next = add i64 %index, 32
790 %loop = icmp eq i64 %index.next, 16384
791 br i1 %loop, label %for.end, label %vector.body
793 for.end: ; preds = %vector.body
796 ; AVX2-LABEL: test25:
800 define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
802 br label %vector.body
804 vector.body: ; preds = %vector.body, %vector.ph
805 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
806 %gep.a = getelementptr inbounds i8* %a, i64 %index
807 %gep.b = getelementptr inbounds i8* %b, i64 %index
808 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
809 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
810 %load.a = load <32 x i8>* %ptr.a, align 2
811 %load.b = load <32 x i8>* %ptr.b, align 2
812 %cmp = icmp sle <32 x i8> %load.a, %load.b
813 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
814 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
815 %index.next = add i64 %index, 32
816 %loop = icmp eq i64 %index.next, 16384
817 br i1 %loop, label %for.end, label %vector.body
819 for.end: ; preds = %vector.body
822 ; AVX2-LABEL: test26:
826 define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
828 br label %vector.body
830 vector.body: ; preds = %vector.body, %vector.ph
831 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
832 %gep.a = getelementptr inbounds i8* %a, i64 %index
833 %gep.b = getelementptr inbounds i8* %b, i64 %index
834 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
835 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
836 %load.a = load <32 x i8>* %ptr.a, align 2
837 %load.b = load <32 x i8>* %ptr.b, align 2
838 %cmp = icmp sgt <32 x i8> %load.a, %load.b
839 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
840 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
841 %index.next = add i64 %index, 32
842 %loop = icmp eq i64 %index.next, 16384
843 br i1 %loop, label %for.end, label %vector.body
845 for.end: ; preds = %vector.body
848 ; AVX2-LABEL: test27:
852 define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
854 br label %vector.body
856 vector.body: ; preds = %vector.body, %vector.ph
857 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
858 %gep.a = getelementptr inbounds i8* %a, i64 %index
859 %gep.b = getelementptr inbounds i8* %b, i64 %index
860 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
861 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
862 %load.a = load <32 x i8>* %ptr.a, align 2
863 %load.b = load <32 x i8>* %ptr.b, align 2
864 %cmp = icmp sge <32 x i8> %load.a, %load.b
865 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
866 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
867 %index.next = add i64 %index, 32
868 %loop = icmp eq i64 %index.next, 16384
869 br i1 %loop, label %for.end, label %vector.body
871 for.end: ; preds = %vector.body
874 ; AVX2-LABEL: test28:
878 define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
880 br label %vector.body
882 vector.body: ; preds = %vector.body, %vector.ph
883 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
884 %gep.a = getelementptr inbounds i8* %a, i64 %index
885 %gep.b = getelementptr inbounds i8* %b, i64 %index
886 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
887 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
888 %load.a = load <32 x i8>* %ptr.a, align 2
889 %load.b = load <32 x i8>* %ptr.b, align 2
890 %cmp = icmp ult <32 x i8> %load.a, %load.b
891 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
892 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
893 %index.next = add i64 %index, 32
894 %loop = icmp eq i64 %index.next, 16384
895 br i1 %loop, label %for.end, label %vector.body
897 for.end: ; preds = %vector.body
900 ; AVX2-LABEL: test29:
904 define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
906 br label %vector.body
908 vector.body: ; preds = %vector.body, %vector.ph
909 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
910 %gep.a = getelementptr inbounds i8* %a, i64 %index
911 %gep.b = getelementptr inbounds i8* %b, i64 %index
912 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
913 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
914 %load.a = load <32 x i8>* %ptr.a, align 2
915 %load.b = load <32 x i8>* %ptr.b, align 2
916 %cmp = icmp ule <32 x i8> %load.a, %load.b
917 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
918 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
919 %index.next = add i64 %index, 32
920 %loop = icmp eq i64 %index.next, 16384
921 br i1 %loop, label %for.end, label %vector.body
923 for.end: ; preds = %vector.body
926 ; AVX2-LABEL: test30:
930 define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
932 br label %vector.body
934 vector.body: ; preds = %vector.body, %vector.ph
935 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
936 %gep.a = getelementptr inbounds i8* %a, i64 %index
937 %gep.b = getelementptr inbounds i8* %b, i64 %index
938 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
939 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
940 %load.a = load <32 x i8>* %ptr.a, align 2
941 %load.b = load <32 x i8>* %ptr.b, align 2
942 %cmp = icmp ugt <32 x i8> %load.a, %load.b
943 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
944 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
945 %index.next = add i64 %index, 32
946 %loop = icmp eq i64 %index.next, 16384
947 br i1 %loop, label %for.end, label %vector.body
949 for.end: ; preds = %vector.body
952 ; AVX2-LABEL: test31:
956 define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
958 br label %vector.body
960 vector.body: ; preds = %vector.body, %vector.ph
961 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
962 %gep.a = getelementptr inbounds i8* %a, i64 %index
963 %gep.b = getelementptr inbounds i8* %b, i64 %index
964 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
965 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
966 %load.a = load <32 x i8>* %ptr.a, align 2
967 %load.b = load <32 x i8>* %ptr.b, align 2
968 %cmp = icmp uge <32 x i8> %load.a, %load.b
969 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
970 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
971 %index.next = add i64 %index, 32
972 %loop = icmp eq i64 %index.next, 16384
973 br i1 %loop, label %for.end, label %vector.body
975 for.end: ; preds = %vector.body
978 ; AVX2-LABEL: test32:
982 define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
984 br label %vector.body
986 vector.body: ; preds = %vector.body, %vector.ph
987 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
988 %gep.a = getelementptr inbounds i16* %a, i64 %index
989 %gep.b = getelementptr inbounds i16* %b, i64 %index
990 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
991 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
992 %load.a = load <16 x i16>* %ptr.a, align 2
993 %load.b = load <16 x i16>* %ptr.b, align 2
994 %cmp = icmp slt <16 x i16> %load.a, %load.b
995 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
996 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
997 %index.next = add i64 %index, 16
998 %loop = icmp eq i64 %index.next, 16384
999 br i1 %loop, label %for.end, label %vector.body
1001 for.end: ; preds = %vector.body
1004 ; AVX2-LABEL: test33:
1008 define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
1010 br label %vector.body
1012 vector.body: ; preds = %vector.body, %vector.ph
1013 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1014 %gep.a = getelementptr inbounds i16* %a, i64 %index
1015 %gep.b = getelementptr inbounds i16* %b, i64 %index
1016 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1017 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1018 %load.a = load <16 x i16>* %ptr.a, align 2
1019 %load.b = load <16 x i16>* %ptr.b, align 2
1020 %cmp = icmp sle <16 x i16> %load.a, %load.b
1021 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1022 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1023 %index.next = add i64 %index, 16
1024 %loop = icmp eq i64 %index.next, 16384
1025 br i1 %loop, label %for.end, label %vector.body
1027 for.end: ; preds = %vector.body
1030 ; AVX2-LABEL: test34:
1034 define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
1036 br label %vector.body
1038 vector.body: ; preds = %vector.body, %vector.ph
1039 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1040 %gep.a = getelementptr inbounds i16* %a, i64 %index
1041 %gep.b = getelementptr inbounds i16* %b, i64 %index
1042 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1043 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1044 %load.a = load <16 x i16>* %ptr.a, align 2
1045 %load.b = load <16 x i16>* %ptr.b, align 2
1046 %cmp = icmp sgt <16 x i16> %load.a, %load.b
1047 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1048 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1049 %index.next = add i64 %index, 16
1050 %loop = icmp eq i64 %index.next, 16384
1051 br i1 %loop, label %for.end, label %vector.body
1053 for.end: ; preds = %vector.body
1056 ; AVX2-LABEL: test35:
1060 define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
1062 br label %vector.body
1064 vector.body: ; preds = %vector.body, %vector.ph
1065 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1066 %gep.a = getelementptr inbounds i16* %a, i64 %index
1067 %gep.b = getelementptr inbounds i16* %b, i64 %index
1068 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1069 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1070 %load.a = load <16 x i16>* %ptr.a, align 2
1071 %load.b = load <16 x i16>* %ptr.b, align 2
1072 %cmp = icmp sge <16 x i16> %load.a, %load.b
1073 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1074 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1075 %index.next = add i64 %index, 16
1076 %loop = icmp eq i64 %index.next, 16384
1077 br i1 %loop, label %for.end, label %vector.body
1079 for.end: ; preds = %vector.body
1082 ; AVX2-LABEL: test36:
1086 define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
1088 br label %vector.body
1090 vector.body: ; preds = %vector.body, %vector.ph
1091 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1092 %gep.a = getelementptr inbounds i16* %a, i64 %index
1093 %gep.b = getelementptr inbounds i16* %b, i64 %index
1094 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1095 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1096 %load.a = load <16 x i16>* %ptr.a, align 2
1097 %load.b = load <16 x i16>* %ptr.b, align 2
1098 %cmp = icmp ult <16 x i16> %load.a, %load.b
1099 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1100 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1101 %index.next = add i64 %index, 16
1102 %loop = icmp eq i64 %index.next, 16384
1103 br i1 %loop, label %for.end, label %vector.body
1105 for.end: ; preds = %vector.body
1108 ; AVX2-LABEL: test37:
1112 define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
1114 br label %vector.body
1116 vector.body: ; preds = %vector.body, %vector.ph
1117 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1118 %gep.a = getelementptr inbounds i16* %a, i64 %index
1119 %gep.b = getelementptr inbounds i16* %b, i64 %index
1120 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1121 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1122 %load.a = load <16 x i16>* %ptr.a, align 2
1123 %load.b = load <16 x i16>* %ptr.b, align 2
1124 %cmp = icmp ule <16 x i16> %load.a, %load.b
1125 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1126 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1127 %index.next = add i64 %index, 16
1128 %loop = icmp eq i64 %index.next, 16384
1129 br i1 %loop, label %for.end, label %vector.body
1131 for.end: ; preds = %vector.body
1134 ; AVX2-LABEL: test38:
1138 define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
1140 br label %vector.body
1142 vector.body: ; preds = %vector.body, %vector.ph
1143 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1144 %gep.a = getelementptr inbounds i16* %a, i64 %index
1145 %gep.b = getelementptr inbounds i16* %b, i64 %index
1146 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1147 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1148 %load.a = load <16 x i16>* %ptr.a, align 2
1149 %load.b = load <16 x i16>* %ptr.b, align 2
1150 %cmp = icmp ugt <16 x i16> %load.a, %load.b
1151 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1152 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1153 %index.next = add i64 %index, 16
1154 %loop = icmp eq i64 %index.next, 16384
1155 br i1 %loop, label %for.end, label %vector.body
1157 for.end: ; preds = %vector.body
1160 ; AVX2-LABEL: test39:
1164 define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
1166 br label %vector.body
1168 vector.body: ; preds = %vector.body, %vector.ph
1169 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1170 %gep.a = getelementptr inbounds i16* %a, i64 %index
1171 %gep.b = getelementptr inbounds i16* %b, i64 %index
1172 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1173 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1174 %load.a = load <16 x i16>* %ptr.a, align 2
1175 %load.b = load <16 x i16>* %ptr.b, align 2
1176 %cmp = icmp uge <16 x i16> %load.a, %load.b
1177 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1178 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1179 %index.next = add i64 %index, 16
1180 %loop = icmp eq i64 %index.next, 16384
1181 br i1 %loop, label %for.end, label %vector.body
1183 for.end: ; preds = %vector.body
1186 ; AVX2-LABEL: test40:
1190 define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
1192 br label %vector.body
1194 vector.body: ; preds = %vector.body, %vector.ph
1195 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1196 %gep.a = getelementptr inbounds i32* %a, i64 %index
1197 %gep.b = getelementptr inbounds i32* %b, i64 %index
1198 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1199 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1200 %load.a = load <8 x i32>* %ptr.a, align 2
1201 %load.b = load <8 x i32>* %ptr.b, align 2
1202 %cmp = icmp slt <8 x i32> %load.a, %load.b
1203 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1204 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1205 %index.next = add i64 %index, 8
1206 %loop = icmp eq i64 %index.next, 16384
1207 br i1 %loop, label %for.end, label %vector.body
1209 for.end: ; preds = %vector.body
1212 ; AVX2-LABEL: test41:
1216 define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
1218 br label %vector.body
1220 vector.body: ; preds = %vector.body, %vector.ph
1221 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1222 %gep.a = getelementptr inbounds i32* %a, i64 %index
1223 %gep.b = getelementptr inbounds i32* %b, i64 %index
1224 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1225 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1226 %load.a = load <8 x i32>* %ptr.a, align 2
1227 %load.b = load <8 x i32>* %ptr.b, align 2
1228 %cmp = icmp sle <8 x i32> %load.a, %load.b
1229 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1230 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1231 %index.next = add i64 %index, 8
1232 %loop = icmp eq i64 %index.next, 16384
1233 br i1 %loop, label %for.end, label %vector.body
1235 for.end: ; preds = %vector.body
1238 ; AVX2-LABEL: test42:
1242 define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
1244 br label %vector.body
1246 vector.body: ; preds = %vector.body, %vector.ph
1247 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1248 %gep.a = getelementptr inbounds i32* %a, i64 %index
1249 %gep.b = getelementptr inbounds i32* %b, i64 %index
1250 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1251 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1252 %load.a = load <8 x i32>* %ptr.a, align 2
1253 %load.b = load <8 x i32>* %ptr.b, align 2
1254 %cmp = icmp sgt <8 x i32> %load.a, %load.b
1255 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1256 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1257 %index.next = add i64 %index, 8
1258 %loop = icmp eq i64 %index.next, 16384
1259 br i1 %loop, label %for.end, label %vector.body
1261 for.end: ; preds = %vector.body
1264 ; AVX2-LABEL: test43:
1268 define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
1270 br label %vector.body
1272 vector.body: ; preds = %vector.body, %vector.ph
1273 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1274 %gep.a = getelementptr inbounds i32* %a, i64 %index
1275 %gep.b = getelementptr inbounds i32* %b, i64 %index
1276 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1277 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1278 %load.a = load <8 x i32>* %ptr.a, align 2
1279 %load.b = load <8 x i32>* %ptr.b, align 2
1280 %cmp = icmp sge <8 x i32> %load.a, %load.b
1281 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1282 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1283 %index.next = add i64 %index, 8
1284 %loop = icmp eq i64 %index.next, 16384
1285 br i1 %loop, label %for.end, label %vector.body
1287 for.end: ; preds = %vector.body
1290 ; AVX2-LABEL: test44:
1294 define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
1296 br label %vector.body
1298 vector.body: ; preds = %vector.body, %vector.ph
1299 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1300 %gep.a = getelementptr inbounds i32* %a, i64 %index
1301 %gep.b = getelementptr inbounds i32* %b, i64 %index
1302 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1303 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1304 %load.a = load <8 x i32>* %ptr.a, align 2
1305 %load.b = load <8 x i32>* %ptr.b, align 2
1306 %cmp = icmp ult <8 x i32> %load.a, %load.b
1307 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1308 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1309 %index.next = add i64 %index, 8
1310 %loop = icmp eq i64 %index.next, 16384
1311 br i1 %loop, label %for.end, label %vector.body
1313 for.end: ; preds = %vector.body
1316 ; AVX2-LABEL: test45:
1320 define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
1322 br label %vector.body
1324 vector.body: ; preds = %vector.body, %vector.ph
1325 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1326 %gep.a = getelementptr inbounds i32* %a, i64 %index
1327 %gep.b = getelementptr inbounds i32* %b, i64 %index
1328 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1329 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1330 %load.a = load <8 x i32>* %ptr.a, align 2
1331 %load.b = load <8 x i32>* %ptr.b, align 2
1332 %cmp = icmp ule <8 x i32> %load.a, %load.b
1333 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1334 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1335 %index.next = add i64 %index, 8
1336 %loop = icmp eq i64 %index.next, 16384
1337 br i1 %loop, label %for.end, label %vector.body
1339 for.end: ; preds = %vector.body
1342 ; AVX2-LABEL: test46:
1346 define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
1348 br label %vector.body
1350 vector.body: ; preds = %vector.body, %vector.ph
1351 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1352 %gep.a = getelementptr inbounds i32* %a, i64 %index
1353 %gep.b = getelementptr inbounds i32* %b, i64 %index
1354 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1355 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1356 %load.a = load <8 x i32>* %ptr.a, align 2
1357 %load.b = load <8 x i32>* %ptr.b, align 2
1358 %cmp = icmp ugt <8 x i32> %load.a, %load.b
1359 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1360 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1361 %index.next = add i64 %index, 8
1362 %loop = icmp eq i64 %index.next, 16384
1363 br i1 %loop, label %for.end, label %vector.body
1365 for.end: ; preds = %vector.body
1368 ; AVX2-LABEL: test47:
1372 define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
1374 br label %vector.body
1376 vector.body: ; preds = %vector.body, %vector.ph
1377 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1378 %gep.a = getelementptr inbounds i32* %a, i64 %index
1379 %gep.b = getelementptr inbounds i32* %b, i64 %index
1380 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1381 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1382 %load.a = load <8 x i32>* %ptr.a, align 2
1383 %load.b = load <8 x i32>* %ptr.b, align 2
1384 %cmp = icmp uge <8 x i32> %load.a, %load.b
1385 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1386 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1387 %index.next = add i64 %index, 8
1388 %loop = icmp eq i64 %index.next, 16384
1389 br i1 %loop, label %for.end, label %vector.body
1391 for.end: ; preds = %vector.body
1394 ; AVX2-LABEL: test48:
1398 define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
1400 br label %vector.body
1402 vector.body: ; preds = %vector.body, %vector.ph
1403 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1404 %gep.a = getelementptr inbounds i8* %a, i64 %index
1405 %gep.b = getelementptr inbounds i8* %b, i64 %index
1406 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1407 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1408 %load.a = load <16 x i8>* %ptr.a, align 2
1409 %load.b = load <16 x i8>* %ptr.b, align 2
1410 %cmp = icmp slt <16 x i8> %load.a, %load.b
1411 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1412 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1413 %index.next = add i64 %index, 16
1414 %loop = icmp eq i64 %index.next, 16384
1415 br i1 %loop, label %for.end, label %vector.body
1417 for.end: ; preds = %vector.body
1420 ; SSE4-LABEL: test49:
1423 ; AVX1-LABEL: test49:
1426 ; AVX2-LABEL: test49:
1430 define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
1432 br label %vector.body
1434 vector.body: ; preds = %vector.body, %vector.ph
1435 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1436 %gep.a = getelementptr inbounds i8* %a, i64 %index
1437 %gep.b = getelementptr inbounds i8* %b, i64 %index
1438 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1439 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1440 %load.a = load <16 x i8>* %ptr.a, align 2
1441 %load.b = load <16 x i8>* %ptr.b, align 2
1442 %cmp = icmp sle <16 x i8> %load.a, %load.b
1443 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1444 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1445 %index.next = add i64 %index, 16
1446 %loop = icmp eq i64 %index.next, 16384
1447 br i1 %loop, label %for.end, label %vector.body
1449 for.end: ; preds = %vector.body
1452 ; SSE4-LABEL: test50:
1455 ; AVX1-LABEL: test50:
1458 ; AVX2-LABEL: test50:
1462 define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
1464 br label %vector.body
1466 vector.body: ; preds = %vector.body, %vector.ph
1467 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1468 %gep.a = getelementptr inbounds i8* %a, i64 %index
1469 %gep.b = getelementptr inbounds i8* %b, i64 %index
1470 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1471 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1472 %load.a = load <16 x i8>* %ptr.a, align 2
1473 %load.b = load <16 x i8>* %ptr.b, align 2
1474 %cmp = icmp sgt <16 x i8> %load.a, %load.b
1475 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1476 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1477 %index.next = add i64 %index, 16
1478 %loop = icmp eq i64 %index.next, 16384
1479 br i1 %loop, label %for.end, label %vector.body
1481 for.end: ; preds = %vector.body
1484 ; SSE4-LABEL: test51:
1487 ; AVX1-LABEL: test51:
1490 ; AVX2-LABEL: test51:
1494 define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
1496 br label %vector.body
1498 vector.body: ; preds = %vector.body, %vector.ph
1499 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1500 %gep.a = getelementptr inbounds i8* %a, i64 %index
1501 %gep.b = getelementptr inbounds i8* %b, i64 %index
1502 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1503 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1504 %load.a = load <16 x i8>* %ptr.a, align 2
1505 %load.b = load <16 x i8>* %ptr.b, align 2
1506 %cmp = icmp sge <16 x i8> %load.a, %load.b
1507 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1508 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1509 %index.next = add i64 %index, 16
1510 %loop = icmp eq i64 %index.next, 16384
1511 br i1 %loop, label %for.end, label %vector.body
1513 for.end: ; preds = %vector.body
1516 ; SSE4-LABEL: test52:
1519 ; AVX1-LABEL: test52:
1522 ; AVX2-LABEL: test52:
1526 define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
1528 br label %vector.body
1530 vector.body: ; preds = %vector.body, %vector.ph
1531 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1532 %gep.a = getelementptr inbounds i8* %a, i64 %index
1533 %gep.b = getelementptr inbounds i8* %b, i64 %index
1534 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1535 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1536 %load.a = load <16 x i8>* %ptr.a, align 2
1537 %load.b = load <16 x i8>* %ptr.b, align 2
1538 %cmp = icmp ult <16 x i8> %load.a, %load.b
1539 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1540 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1541 %index.next = add i64 %index, 16
1542 %loop = icmp eq i64 %index.next, 16384
1543 br i1 %loop, label %for.end, label %vector.body
1545 for.end: ; preds = %vector.body
1548 ; SSE2-LABEL: test53:
1551 ; AVX1-LABEL: test53:
1554 ; AVX2-LABEL: test53:
1558 define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
1560 br label %vector.body
1562 vector.body: ; preds = %vector.body, %vector.ph
1563 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1564 %gep.a = getelementptr inbounds i8* %a, i64 %index
1565 %gep.b = getelementptr inbounds i8* %b, i64 %index
1566 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1567 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1568 %load.a = load <16 x i8>* %ptr.a, align 2
1569 %load.b = load <16 x i8>* %ptr.b, align 2
1570 %cmp = icmp ule <16 x i8> %load.a, %load.b
1571 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1572 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1573 %index.next = add i64 %index, 16
1574 %loop = icmp eq i64 %index.next, 16384
1575 br i1 %loop, label %for.end, label %vector.body
1577 for.end: ; preds = %vector.body
1580 ; SSE2-LABEL: test54:
1583 ; AVX1-LABEL: test54:
1586 ; AVX2-LABEL: test54:
1590 define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
1592 br label %vector.body
1594 vector.body: ; preds = %vector.body, %vector.ph
1595 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1596 %gep.a = getelementptr inbounds i8* %a, i64 %index
1597 %gep.b = getelementptr inbounds i8* %b, i64 %index
1598 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1599 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1600 %load.a = load <16 x i8>* %ptr.a, align 2
1601 %load.b = load <16 x i8>* %ptr.b, align 2
1602 %cmp = icmp ugt <16 x i8> %load.a, %load.b
1603 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1604 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1605 %index.next = add i64 %index, 16
1606 %loop = icmp eq i64 %index.next, 16384
1607 br i1 %loop, label %for.end, label %vector.body
1609 for.end: ; preds = %vector.body
1612 ; SSE2-LABEL: test55:
1615 ; AVX1-LABEL: test55:
1618 ; AVX2-LABEL: test55:
1622 define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
1624 br label %vector.body
1626 vector.body: ; preds = %vector.body, %vector.ph
1627 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1628 %gep.a = getelementptr inbounds i8* %a, i64 %index
1629 %gep.b = getelementptr inbounds i8* %b, i64 %index
1630 %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1631 %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1632 %load.a = load <16 x i8>* %ptr.a, align 2
1633 %load.b = load <16 x i8>* %ptr.b, align 2
1634 %cmp = icmp uge <16 x i8> %load.a, %load.b
1635 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1636 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1637 %index.next = add i64 %index, 16
1638 %loop = icmp eq i64 %index.next, 16384
1639 br i1 %loop, label %for.end, label %vector.body
1641 for.end: ; preds = %vector.body
1644 ; SSE2-LABEL: test56:
1647 ; AVX1-LABEL: test56:
1650 ; AVX2-LABEL: test56:
1654 define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
1656 br label %vector.body
1658 vector.body: ; preds = %vector.body, %vector.ph
1659 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1660 %gep.a = getelementptr inbounds i16* %a, i64 %index
1661 %gep.b = getelementptr inbounds i16* %b, i64 %index
1662 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1663 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1664 %load.a = load <8 x i16>* %ptr.a, align 2
1665 %load.b = load <8 x i16>* %ptr.b, align 2
1666 %cmp = icmp slt <8 x i16> %load.a, %load.b
1667 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1668 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1669 %index.next = add i64 %index, 8
1670 %loop = icmp eq i64 %index.next, 16384
1671 br i1 %loop, label %for.end, label %vector.body
1673 for.end: ; preds = %vector.body
1676 ; SSE2-LABEL: test57:
1679 ; AVX1-LABEL: test57:
1682 ; AVX2-LABEL: test57:
1686 define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
1688 br label %vector.body
1690 vector.body: ; preds = %vector.body, %vector.ph
1691 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1692 %gep.a = getelementptr inbounds i16* %a, i64 %index
1693 %gep.b = getelementptr inbounds i16* %b, i64 %index
1694 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1695 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1696 %load.a = load <8 x i16>* %ptr.a, align 2
1697 %load.b = load <8 x i16>* %ptr.b, align 2
1698 %cmp = icmp sle <8 x i16> %load.a, %load.b
1699 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1700 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1701 %index.next = add i64 %index, 8
1702 %loop = icmp eq i64 %index.next, 16384
1703 br i1 %loop, label %for.end, label %vector.body
1705 for.end: ; preds = %vector.body
1708 ; SSE2-LABEL: test58:
1711 ; AVX1-LABEL: test58:
1714 ; AVX2-LABEL: test58:
1718 define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
1720 br label %vector.body
1722 vector.body: ; preds = %vector.body, %vector.ph
1723 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1724 %gep.a = getelementptr inbounds i16* %a, i64 %index
1725 %gep.b = getelementptr inbounds i16* %b, i64 %index
1726 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1727 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1728 %load.a = load <8 x i16>* %ptr.a, align 2
1729 %load.b = load <8 x i16>* %ptr.b, align 2
1730 %cmp = icmp sgt <8 x i16> %load.a, %load.b
1731 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1732 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1733 %index.next = add i64 %index, 8
1734 %loop = icmp eq i64 %index.next, 16384
1735 br i1 %loop, label %for.end, label %vector.body
1737 for.end: ; preds = %vector.body
1740 ; SSE2-LABEL: test59:
1743 ; AVX1-LABEL: test59:
1746 ; AVX2-LABEL: test59:
1750 define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
1752 br label %vector.body
1754 vector.body: ; preds = %vector.body, %vector.ph
1755 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1756 %gep.a = getelementptr inbounds i16* %a, i64 %index
1757 %gep.b = getelementptr inbounds i16* %b, i64 %index
1758 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1759 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1760 %load.a = load <8 x i16>* %ptr.a, align 2
1761 %load.b = load <8 x i16>* %ptr.b, align 2
1762 %cmp = icmp sge <8 x i16> %load.a, %load.b
1763 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1764 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1765 %index.next = add i64 %index, 8
1766 %loop = icmp eq i64 %index.next, 16384
1767 br i1 %loop, label %for.end, label %vector.body
1769 for.end: ; preds = %vector.body
1772 ; SSE2-LABEL: test60:
1775 ; AVX1-LABEL: test60:
1778 ; AVX2-LABEL: test60:
1782 define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
1784 br label %vector.body
1786 vector.body: ; preds = %vector.body, %vector.ph
1787 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1788 %gep.a = getelementptr inbounds i16* %a, i64 %index
1789 %gep.b = getelementptr inbounds i16* %b, i64 %index
1790 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1791 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1792 %load.a = load <8 x i16>* %ptr.a, align 2
1793 %load.b = load <8 x i16>* %ptr.b, align 2
1794 %cmp = icmp ult <8 x i16> %load.a, %load.b
1795 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1796 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1797 %index.next = add i64 %index, 8
1798 %loop = icmp eq i64 %index.next, 16384
1799 br i1 %loop, label %for.end, label %vector.body
1801 for.end: ; preds = %vector.body
1804 ; SSE4-LABEL: test61:
1807 ; AVX1-LABEL: test61:
1810 ; AVX2-LABEL: test61:
1814 define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
1816 br label %vector.body
1818 vector.body: ; preds = %vector.body, %vector.ph
1819 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1820 %gep.a = getelementptr inbounds i16* %a, i64 %index
1821 %gep.b = getelementptr inbounds i16* %b, i64 %index
1822 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1823 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1824 %load.a = load <8 x i16>* %ptr.a, align 2
1825 %load.b = load <8 x i16>* %ptr.b, align 2
1826 %cmp = icmp ule <8 x i16> %load.a, %load.b
1827 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1828 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1829 %index.next = add i64 %index, 8
1830 %loop = icmp eq i64 %index.next, 16384
1831 br i1 %loop, label %for.end, label %vector.body
1833 for.end: ; preds = %vector.body
1836 ; SSE4-LABEL: test62:
1839 ; AVX1-LABEL: test62:
1842 ; AVX2-LABEL: test62:
1846 define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
1848 br label %vector.body
1850 vector.body: ; preds = %vector.body, %vector.ph
1851 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1852 %gep.a = getelementptr inbounds i16* %a, i64 %index
1853 %gep.b = getelementptr inbounds i16* %b, i64 %index
1854 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1855 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1856 %load.a = load <8 x i16>* %ptr.a, align 2
1857 %load.b = load <8 x i16>* %ptr.b, align 2
1858 %cmp = icmp ugt <8 x i16> %load.a, %load.b
1859 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1860 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1861 %index.next = add i64 %index, 8
1862 %loop = icmp eq i64 %index.next, 16384
1863 br i1 %loop, label %for.end, label %vector.body
1865 for.end: ; preds = %vector.body
1868 ; SSE4-LABEL: test63:
1871 ; AVX1-LABEL: test63:
1874 ; AVX2-LABEL: test63:
1878 define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
1880 br label %vector.body
1882 vector.body: ; preds = %vector.body, %vector.ph
1883 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1884 %gep.a = getelementptr inbounds i16* %a, i64 %index
1885 %gep.b = getelementptr inbounds i16* %b, i64 %index
1886 %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1887 %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1888 %load.a = load <8 x i16>* %ptr.a, align 2
1889 %load.b = load <8 x i16>* %ptr.b, align 2
1890 %cmp = icmp uge <8 x i16> %load.a, %load.b
1891 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1892 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1893 %index.next = add i64 %index, 8
1894 %loop = icmp eq i64 %index.next, 16384
1895 br i1 %loop, label %for.end, label %vector.body
1897 for.end: ; preds = %vector.body
1900 ; SSE4-LABEL: test64:
1903 ; AVX1-LABEL: test64:
1906 ; AVX2-LABEL: test64:
1910 define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
1912 br label %vector.body
1914 vector.body: ; preds = %vector.body, %vector.ph
1915 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1916 %gep.a = getelementptr inbounds i32* %a, i64 %index
1917 %gep.b = getelementptr inbounds i32* %b, i64 %index
1918 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
1919 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
1920 %load.a = load <4 x i32>* %ptr.a, align 2
1921 %load.b = load <4 x i32>* %ptr.b, align 2
1922 %cmp = icmp slt <4 x i32> %load.a, %load.b
1923 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
1924 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
1925 %index.next = add i64 %index, 4
1926 %loop = icmp eq i64 %index.next, 16384
1927 br i1 %loop, label %for.end, label %vector.body
1929 for.end: ; preds = %vector.body
1932 ; SSE4-LABEL: test65:
1935 ; AVX1-LABEL: test65:
1938 ; AVX2-LABEL: test65:
1942 define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
1944 br label %vector.body
1946 vector.body: ; preds = %vector.body, %vector.ph
1947 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1948 %gep.a = getelementptr inbounds i32* %a, i64 %index
1949 %gep.b = getelementptr inbounds i32* %b, i64 %index
1950 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
1951 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
1952 %load.a = load <4 x i32>* %ptr.a, align 2
1953 %load.b = load <4 x i32>* %ptr.b, align 2
1954 %cmp = icmp sle <4 x i32> %load.a, %load.b
1955 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
1956 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
1957 %index.next = add i64 %index, 4
1958 %loop = icmp eq i64 %index.next, 16384
1959 br i1 %loop, label %for.end, label %vector.body
1961 for.end: ; preds = %vector.body
1964 ; SSE4-LABEL: test66:
1967 ; AVX1-LABEL: test66:
1970 ; AVX2-LABEL: test66:
1974 define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
1976 br label %vector.body
1978 vector.body: ; preds = %vector.body, %vector.ph
1979 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1980 %gep.a = getelementptr inbounds i32* %a, i64 %index
1981 %gep.b = getelementptr inbounds i32* %b, i64 %index
1982 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
1983 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
1984 %load.a = load <4 x i32>* %ptr.a, align 2
1985 %load.b = load <4 x i32>* %ptr.b, align 2
1986 %cmp = icmp sgt <4 x i32> %load.a, %load.b
1987 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
1988 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
1989 %index.next = add i64 %index, 4
1990 %loop = icmp eq i64 %index.next, 16384
1991 br i1 %loop, label %for.end, label %vector.body
1993 for.end: ; preds = %vector.body
1996 ; SSE4-LABEL: test67:
1999 ; AVX1-LABEL: test67:
2002 ; AVX2-LABEL: test67:
2006 define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
2008 br label %vector.body
2010 vector.body: ; preds = %vector.body, %vector.ph
2011 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2012 %gep.a = getelementptr inbounds i32* %a, i64 %index
2013 %gep.b = getelementptr inbounds i32* %b, i64 %index
2014 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2015 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2016 %load.a = load <4 x i32>* %ptr.a, align 2
2017 %load.b = load <4 x i32>* %ptr.b, align 2
2018 %cmp = icmp sge <4 x i32> %load.a, %load.b
2019 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2020 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2021 %index.next = add i64 %index, 4
2022 %loop = icmp eq i64 %index.next, 16384
2023 br i1 %loop, label %for.end, label %vector.body
2025 for.end: ; preds = %vector.body
2028 ; SSE4-LABEL: test68:
2031 ; AVX1-LABEL: test68:
2034 ; AVX2-LABEL: test68:
2038 define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
2040 br label %vector.body
2042 vector.body: ; preds = %vector.body, %vector.ph
2043 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2044 %gep.a = getelementptr inbounds i32* %a, i64 %index
2045 %gep.b = getelementptr inbounds i32* %b, i64 %index
2046 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2047 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2048 %load.a = load <4 x i32>* %ptr.a, align 2
2049 %load.b = load <4 x i32>* %ptr.b, align 2
2050 %cmp = icmp ult <4 x i32> %load.a, %load.b
2051 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2052 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2053 %index.next = add i64 %index, 4
2054 %loop = icmp eq i64 %index.next, 16384
2055 br i1 %loop, label %for.end, label %vector.body
2057 for.end: ; preds = %vector.body
2060 ; SSE4-LABEL: test69:
2063 ; AVX1-LABEL: test69:
2066 ; AVX2-LABEL: test69:
2070 define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
2072 br label %vector.body
2074 vector.body: ; preds = %vector.body, %vector.ph
2075 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2076 %gep.a = getelementptr inbounds i32* %a, i64 %index
2077 %gep.b = getelementptr inbounds i32* %b, i64 %index
2078 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2079 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2080 %load.a = load <4 x i32>* %ptr.a, align 2
2081 %load.b = load <4 x i32>* %ptr.b, align 2
2082 %cmp = icmp ule <4 x i32> %load.a, %load.b
2083 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2084 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2085 %index.next = add i64 %index, 4
2086 %loop = icmp eq i64 %index.next, 16384
2087 br i1 %loop, label %for.end, label %vector.body
2089 for.end: ; preds = %vector.body
2092 ; SSE4-LABEL: test70:
2095 ; AVX1-LABEL: test70:
2098 ; AVX2-LABEL: test70:
2102 define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
2104 br label %vector.body
2106 vector.body: ; preds = %vector.body, %vector.ph
2107 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2108 %gep.a = getelementptr inbounds i32* %a, i64 %index
2109 %gep.b = getelementptr inbounds i32* %b, i64 %index
2110 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2111 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2112 %load.a = load <4 x i32>* %ptr.a, align 2
2113 %load.b = load <4 x i32>* %ptr.b, align 2
2114 %cmp = icmp ugt <4 x i32> %load.a, %load.b
2115 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2116 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2117 %index.next = add i64 %index, 4
2118 %loop = icmp eq i64 %index.next, 16384
2119 br i1 %loop, label %for.end, label %vector.body
2121 for.end: ; preds = %vector.body
2124 ; SSE4-LABEL: test71:
2127 ; AVX1-LABEL: test71:
2130 ; AVX2-LABEL: test71:
2134 define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
2136 br label %vector.body
2138 vector.body: ; preds = %vector.body, %vector.ph
2139 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2140 %gep.a = getelementptr inbounds i32* %a, i64 %index
2141 %gep.b = getelementptr inbounds i32* %b, i64 %index
2142 %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2143 %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2144 %load.a = load <4 x i32>* %ptr.a, align 2
2145 %load.b = load <4 x i32>* %ptr.b, align 2
2146 %cmp = icmp uge <4 x i32> %load.a, %load.b
2147 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2148 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2149 %index.next = add i64 %index, 4
2150 %loop = icmp eq i64 %index.next, 16384
2151 br i1 %loop, label %for.end, label %vector.body
2153 for.end: ; preds = %vector.body
2156 ; SSE4-LABEL: test72:
2159 ; AVX1-LABEL: test72:
2162 ; AVX2-LABEL: test72:
2166 define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
2168 br label %vector.body
2170 vector.body: ; preds = %vector.body, %vector.ph
2171 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2172 %gep.a = getelementptr inbounds i8* %a, i64 %index
2173 %gep.b = getelementptr inbounds i8* %b, i64 %index
2174 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2175 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2176 %load.a = load <32 x i8>* %ptr.a, align 2
2177 %load.b = load <32 x i8>* %ptr.b, align 2
2178 %cmp = icmp slt <32 x i8> %load.a, %load.b
2179 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2180 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2181 %index.next = add i64 %index, 32
2182 %loop = icmp eq i64 %index.next, 16384
2183 br i1 %loop, label %for.end, label %vector.body
2185 for.end: ; preds = %vector.body
2188 ; AVX2-LABEL: test73:
2192 define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
2194 br label %vector.body
2196 vector.body: ; preds = %vector.body, %vector.ph
2197 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2198 %gep.a = getelementptr inbounds i8* %a, i64 %index
2199 %gep.b = getelementptr inbounds i8* %b, i64 %index
2200 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2201 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2202 %load.a = load <32 x i8>* %ptr.a, align 2
2203 %load.b = load <32 x i8>* %ptr.b, align 2
2204 %cmp = icmp sle <32 x i8> %load.a, %load.b
2205 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2206 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2207 %index.next = add i64 %index, 32
2208 %loop = icmp eq i64 %index.next, 16384
2209 br i1 %loop, label %for.end, label %vector.body
2211 for.end: ; preds = %vector.body
2214 ; AVX2-LABEL: test74:
2218 define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
2220 br label %vector.body
2222 vector.body: ; preds = %vector.body, %vector.ph
2223 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2224 %gep.a = getelementptr inbounds i8* %a, i64 %index
2225 %gep.b = getelementptr inbounds i8* %b, i64 %index
2226 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2227 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2228 %load.a = load <32 x i8>* %ptr.a, align 2
2229 %load.b = load <32 x i8>* %ptr.b, align 2
2230 %cmp = icmp sgt <32 x i8> %load.a, %load.b
2231 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2232 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2233 %index.next = add i64 %index, 32
2234 %loop = icmp eq i64 %index.next, 16384
2235 br i1 %loop, label %for.end, label %vector.body
2237 for.end: ; preds = %vector.body
2240 ; AVX2-LABEL: test75:
2244 define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
2246 br label %vector.body
2248 vector.body: ; preds = %vector.body, %vector.ph
2249 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2250 %gep.a = getelementptr inbounds i8* %a, i64 %index
2251 %gep.b = getelementptr inbounds i8* %b, i64 %index
2252 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2253 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2254 %load.a = load <32 x i8>* %ptr.a, align 2
2255 %load.b = load <32 x i8>* %ptr.b, align 2
2256 %cmp = icmp sge <32 x i8> %load.a, %load.b
2257 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2258 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2259 %index.next = add i64 %index, 32
2260 %loop = icmp eq i64 %index.next, 16384
2261 br i1 %loop, label %for.end, label %vector.body
2263 for.end: ; preds = %vector.body
2266 ; AVX2-LABEL: test76:
2270 define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
2272 br label %vector.body
2274 vector.body: ; preds = %vector.body, %vector.ph
2275 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2276 %gep.a = getelementptr inbounds i8* %a, i64 %index
2277 %gep.b = getelementptr inbounds i8* %b, i64 %index
2278 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2279 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2280 %load.a = load <32 x i8>* %ptr.a, align 2
2281 %load.b = load <32 x i8>* %ptr.b, align 2
2282 %cmp = icmp ult <32 x i8> %load.a, %load.b
2283 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2284 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2285 %index.next = add i64 %index, 32
2286 %loop = icmp eq i64 %index.next, 16384
2287 br i1 %loop, label %for.end, label %vector.body
2289 for.end: ; preds = %vector.body
2292 ; AVX2-LABEL: test77:
2296 define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
2298 br label %vector.body
2300 vector.body: ; preds = %vector.body, %vector.ph
2301 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2302 %gep.a = getelementptr inbounds i8* %a, i64 %index
2303 %gep.b = getelementptr inbounds i8* %b, i64 %index
2304 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2305 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2306 %load.a = load <32 x i8>* %ptr.a, align 2
2307 %load.b = load <32 x i8>* %ptr.b, align 2
2308 %cmp = icmp ule <32 x i8> %load.a, %load.b
2309 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2310 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2311 %index.next = add i64 %index, 32
2312 %loop = icmp eq i64 %index.next, 16384
2313 br i1 %loop, label %for.end, label %vector.body
2315 for.end: ; preds = %vector.body
2318 ; AVX2-LABEL: test78:
2322 define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
2324 br label %vector.body
2326 vector.body: ; preds = %vector.body, %vector.ph
2327 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2328 %gep.a = getelementptr inbounds i8* %a, i64 %index
2329 %gep.b = getelementptr inbounds i8* %b, i64 %index
2330 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2331 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2332 %load.a = load <32 x i8>* %ptr.a, align 2
2333 %load.b = load <32 x i8>* %ptr.b, align 2
2334 %cmp = icmp ugt <32 x i8> %load.a, %load.b
2335 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2336 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2337 %index.next = add i64 %index, 32
2338 %loop = icmp eq i64 %index.next, 16384
2339 br i1 %loop, label %for.end, label %vector.body
2341 for.end: ; preds = %vector.body
2344 ; AVX2-LABEL: test79:
2348 define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
2350 br label %vector.body
2352 vector.body: ; preds = %vector.body, %vector.ph
2353 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2354 %gep.a = getelementptr inbounds i8* %a, i64 %index
2355 %gep.b = getelementptr inbounds i8* %b, i64 %index
2356 %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2357 %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2358 %load.a = load <32 x i8>* %ptr.a, align 2
2359 %load.b = load <32 x i8>* %ptr.b, align 2
2360 %cmp = icmp uge <32 x i8> %load.a, %load.b
2361 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2362 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2363 %index.next = add i64 %index, 32
2364 %loop = icmp eq i64 %index.next, 16384
2365 br i1 %loop, label %for.end, label %vector.body
2367 for.end: ; preds = %vector.body
2370 ; AVX2-LABEL: test80:
2374 define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
2376 br label %vector.body
2378 vector.body: ; preds = %vector.body, %vector.ph
2379 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2380 %gep.a = getelementptr inbounds i16* %a, i64 %index
2381 %gep.b = getelementptr inbounds i16* %b, i64 %index
2382 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2383 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2384 %load.a = load <16 x i16>* %ptr.a, align 2
2385 %load.b = load <16 x i16>* %ptr.b, align 2
2386 %cmp = icmp slt <16 x i16> %load.a, %load.b
2387 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2388 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2389 %index.next = add i64 %index, 16
2390 %loop = icmp eq i64 %index.next, 16384
2391 br i1 %loop, label %for.end, label %vector.body
2393 for.end: ; preds = %vector.body
2396 ; AVX2-LABEL: test81:
2400 define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
2402 br label %vector.body
2404 vector.body: ; preds = %vector.body, %vector.ph
2405 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2406 %gep.a = getelementptr inbounds i16* %a, i64 %index
2407 %gep.b = getelementptr inbounds i16* %b, i64 %index
2408 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2409 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2410 %load.a = load <16 x i16>* %ptr.a, align 2
2411 %load.b = load <16 x i16>* %ptr.b, align 2
2412 %cmp = icmp sle <16 x i16> %load.a, %load.b
2413 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2414 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2415 %index.next = add i64 %index, 16
2416 %loop = icmp eq i64 %index.next, 16384
2417 br i1 %loop, label %for.end, label %vector.body
2419 for.end: ; preds = %vector.body
2422 ; AVX2-LABEL: test82:
2426 define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
2428 br label %vector.body
2430 vector.body: ; preds = %vector.body, %vector.ph
2431 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2432 %gep.a = getelementptr inbounds i16* %a, i64 %index
2433 %gep.b = getelementptr inbounds i16* %b, i64 %index
2434 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2435 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2436 %load.a = load <16 x i16>* %ptr.a, align 2
2437 %load.b = load <16 x i16>* %ptr.b, align 2
2438 %cmp = icmp sgt <16 x i16> %load.a, %load.b
2439 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2440 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2441 %index.next = add i64 %index, 16
2442 %loop = icmp eq i64 %index.next, 16384
2443 br i1 %loop, label %for.end, label %vector.body
2445 for.end: ; preds = %vector.body
2448 ; AVX2-LABEL: test83:
2452 define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
2454 br label %vector.body
2456 vector.body: ; preds = %vector.body, %vector.ph
2457 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2458 %gep.a = getelementptr inbounds i16* %a, i64 %index
2459 %gep.b = getelementptr inbounds i16* %b, i64 %index
2460 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2461 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2462 %load.a = load <16 x i16>* %ptr.a, align 2
2463 %load.b = load <16 x i16>* %ptr.b, align 2
2464 %cmp = icmp sge <16 x i16> %load.a, %load.b
2465 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2466 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2467 %index.next = add i64 %index, 16
2468 %loop = icmp eq i64 %index.next, 16384
2469 br i1 %loop, label %for.end, label %vector.body
2471 for.end: ; preds = %vector.body
2474 ; AVX2-LABEL: test84:
2478 define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
2480 br label %vector.body
2482 vector.body: ; preds = %vector.body, %vector.ph
2483 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2484 %gep.a = getelementptr inbounds i16* %a, i64 %index
2485 %gep.b = getelementptr inbounds i16* %b, i64 %index
2486 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2487 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2488 %load.a = load <16 x i16>* %ptr.a, align 2
2489 %load.b = load <16 x i16>* %ptr.b, align 2
2490 %cmp = icmp ult <16 x i16> %load.a, %load.b
2491 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2492 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2493 %index.next = add i64 %index, 16
2494 %loop = icmp eq i64 %index.next, 16384
2495 br i1 %loop, label %for.end, label %vector.body
2497 for.end: ; preds = %vector.body
2500 ; AVX2-LABEL: test85:
2504 define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
2506 br label %vector.body
2508 vector.body: ; preds = %vector.body, %vector.ph
2509 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2510 %gep.a = getelementptr inbounds i16* %a, i64 %index
2511 %gep.b = getelementptr inbounds i16* %b, i64 %index
2512 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2513 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2514 %load.a = load <16 x i16>* %ptr.a, align 2
2515 %load.b = load <16 x i16>* %ptr.b, align 2
2516 %cmp = icmp ule <16 x i16> %load.a, %load.b
2517 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2518 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2519 %index.next = add i64 %index, 16
2520 %loop = icmp eq i64 %index.next, 16384
2521 br i1 %loop, label %for.end, label %vector.body
2523 for.end: ; preds = %vector.body
2526 ; AVX2-LABEL: test86:
2530 define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
2532 br label %vector.body
2534 vector.body: ; preds = %vector.body, %vector.ph
2535 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2536 %gep.a = getelementptr inbounds i16* %a, i64 %index
2537 %gep.b = getelementptr inbounds i16* %b, i64 %index
2538 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2539 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2540 %load.a = load <16 x i16>* %ptr.a, align 2
2541 %load.b = load <16 x i16>* %ptr.b, align 2
2542 %cmp = icmp ugt <16 x i16> %load.a, %load.b
2543 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2544 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2545 %index.next = add i64 %index, 16
2546 %loop = icmp eq i64 %index.next, 16384
2547 br i1 %loop, label %for.end, label %vector.body
2549 for.end: ; preds = %vector.body
2552 ; AVX2-LABEL: test87:
2556 define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
2558 br label %vector.body
2560 vector.body: ; preds = %vector.body, %vector.ph
2561 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2562 %gep.a = getelementptr inbounds i16* %a, i64 %index
2563 %gep.b = getelementptr inbounds i16* %b, i64 %index
2564 %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2565 %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2566 %load.a = load <16 x i16>* %ptr.a, align 2
2567 %load.b = load <16 x i16>* %ptr.b, align 2
2568 %cmp = icmp uge <16 x i16> %load.a, %load.b
2569 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2570 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2571 %index.next = add i64 %index, 16
2572 %loop = icmp eq i64 %index.next, 16384
2573 br i1 %loop, label %for.end, label %vector.body
2575 for.end: ; preds = %vector.body
2578 ; AVX2-LABEL: test88:
2582 define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
2584 br label %vector.body
2586 vector.body: ; preds = %vector.body, %vector.ph
2587 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2588 %gep.a = getelementptr inbounds i32* %a, i64 %index
2589 %gep.b = getelementptr inbounds i32* %b, i64 %index
2590 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2591 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2592 %load.a = load <8 x i32>* %ptr.a, align 2
2593 %load.b = load <8 x i32>* %ptr.b, align 2
2594 %cmp = icmp slt <8 x i32> %load.a, %load.b
2595 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2596 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2597 %index.next = add i64 %index, 8
2598 %loop = icmp eq i64 %index.next, 16384
2599 br i1 %loop, label %for.end, label %vector.body
2601 for.end: ; preds = %vector.body
2604 ; AVX2-LABEL: test89:
2608 define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
2610 br label %vector.body
2612 vector.body: ; preds = %vector.body, %vector.ph
2613 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2614 %gep.a = getelementptr inbounds i32* %a, i64 %index
2615 %gep.b = getelementptr inbounds i32* %b, i64 %index
2616 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2617 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2618 %load.a = load <8 x i32>* %ptr.a, align 2
2619 %load.b = load <8 x i32>* %ptr.b, align 2
2620 %cmp = icmp sle <8 x i32> %load.a, %load.b
2621 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2622 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2623 %index.next = add i64 %index, 8
2624 %loop = icmp eq i64 %index.next, 16384
2625 br i1 %loop, label %for.end, label %vector.body
2627 for.end: ; preds = %vector.body
2630 ; AVX2-LABEL: test90:
2634 define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
2636 br label %vector.body
2638 vector.body: ; preds = %vector.body, %vector.ph
2639 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2640 %gep.a = getelementptr inbounds i32* %a, i64 %index
2641 %gep.b = getelementptr inbounds i32* %b, i64 %index
2642 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2643 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2644 %load.a = load <8 x i32>* %ptr.a, align 2
2645 %load.b = load <8 x i32>* %ptr.b, align 2
2646 %cmp = icmp sgt <8 x i32> %load.a, %load.b
2647 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2648 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2649 %index.next = add i64 %index, 8
2650 %loop = icmp eq i64 %index.next, 16384
2651 br i1 %loop, label %for.end, label %vector.body
2653 for.end: ; preds = %vector.body
2656 ; AVX2-LABEL: test91:
2660 define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
2662 br label %vector.body
2664 vector.body: ; preds = %vector.body, %vector.ph
2665 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2666 %gep.a = getelementptr inbounds i32* %a, i64 %index
2667 %gep.b = getelementptr inbounds i32* %b, i64 %index
2668 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2669 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2670 %load.a = load <8 x i32>* %ptr.a, align 2
2671 %load.b = load <8 x i32>* %ptr.b, align 2
2672 %cmp = icmp sge <8 x i32> %load.a, %load.b
2673 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2674 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2675 %index.next = add i64 %index, 8
2676 %loop = icmp eq i64 %index.next, 16384
2677 br i1 %loop, label %for.end, label %vector.body
2679 for.end: ; preds = %vector.body
2682 ; AVX2-LABEL: test92:
2686 define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
2688 br label %vector.body
2690 vector.body: ; preds = %vector.body, %vector.ph
2691 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2692 %gep.a = getelementptr inbounds i32* %a, i64 %index
2693 %gep.b = getelementptr inbounds i32* %b, i64 %index
2694 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2695 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2696 %load.a = load <8 x i32>* %ptr.a, align 2
2697 %load.b = load <8 x i32>* %ptr.b, align 2
2698 %cmp = icmp ult <8 x i32> %load.a, %load.b
2699 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2700 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2701 %index.next = add i64 %index, 8
2702 %loop = icmp eq i64 %index.next, 16384
2703 br i1 %loop, label %for.end, label %vector.body
2705 for.end: ; preds = %vector.body
2708 ; AVX2-LABEL: test93:
2712 define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
2714 br label %vector.body
2716 vector.body: ; preds = %vector.body, %vector.ph
2717 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2718 %gep.a = getelementptr inbounds i32* %a, i64 %index
2719 %gep.b = getelementptr inbounds i32* %b, i64 %index
2720 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2721 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2722 %load.a = load <8 x i32>* %ptr.a, align 2
2723 %load.b = load <8 x i32>* %ptr.b, align 2
2724 %cmp = icmp ule <8 x i32> %load.a, %load.b
2725 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2726 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2727 %index.next = add i64 %index, 8
2728 %loop = icmp eq i64 %index.next, 16384
2729 br i1 %loop, label %for.end, label %vector.body
2731 for.end: ; preds = %vector.body
2734 ; AVX2-LABEL: test94:
2738 define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
2740 br label %vector.body
2742 vector.body: ; preds = %vector.body, %vector.ph
2743 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2744 %gep.a = getelementptr inbounds i32* %a, i64 %index
2745 %gep.b = getelementptr inbounds i32* %b, i64 %index
2746 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2747 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2748 %load.a = load <8 x i32>* %ptr.a, align 2
2749 %load.b = load <8 x i32>* %ptr.b, align 2
2750 %cmp = icmp ugt <8 x i32> %load.a, %load.b
2751 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2752 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2753 %index.next = add i64 %index, 8
2754 %loop = icmp eq i64 %index.next, 16384
2755 br i1 %loop, label %for.end, label %vector.body
2757 for.end: ; preds = %vector.body
2760 ; AVX2-LABEL: test95:
2764 define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
2766 br label %vector.body
2768 vector.body: ; preds = %vector.body, %vector.ph
2769 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2770 %gep.a = getelementptr inbounds i32* %a, i64 %index
2771 %gep.b = getelementptr inbounds i32* %b, i64 %index
2772 %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2773 %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2774 %load.a = load <8 x i32>* %ptr.a, align 2
2775 %load.b = load <8 x i32>* %ptr.b, align 2
2776 %cmp = icmp uge <8 x i32> %load.a, %load.b
2777 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2778 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2779 %index.next = add i64 %index, 8
2780 %loop = icmp eq i64 %index.next, 16384
2781 br i1 %loop, label %for.end, label %vector.body
2783 for.end: ; preds = %vector.body
2786 ; AVX2-LABEL: test96: