1 ; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
5 %struct.anon = type { [100 x i32], i32, [100 x i32] }
6 %struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
8 @Foo = common global %struct.anon zeroinitializer, align 4
9 @Bar = common global %struct.anon.0 zeroinitializer, align 4
11 @PB = external global i32*
12 @PA = external global i32*
15 ;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
18 ; /// Different objects, positive induction, constant distance
19 ; int noAlias01 (int a) {
21 ; for (i=0; i<SIZE; i++)
22 ; Foo.A[i] = Foo.B[i] + a;
25 ; CHECK: define i32 @noAlias01
26 ; CHECK: add nsw <4 x i32>
29 define i32 @noAlias01(i32 %a) nounwind {
31 %a.addr = alloca i32, align 4
32 %i = alloca i32, align 4
33 store i32 %a, i32* %a.addr, align 4
34 store i32 0, i32* %i, align 4
37 for.cond: ; preds = %for.inc, %entry
38 %0 = load i32* %i, align 4
39 %cmp = icmp slt i32 %0, 100
40 br i1 %cmp, label %for.body, label %for.end
42 for.body: ; preds = %for.cond
43 %1 = load i32* %i, align 4
44 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
45 %2 = load i32* %arrayidx, align 4
46 %3 = load i32* %a.addr, align 4
47 %add = add nsw i32 %2, %3
48 %4 = load i32* %i, align 4
49 %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
50 store i32 %add, i32* %arrayidx1, align 4
53 for.inc: ; preds = %for.body
54 %5 = load i32* %i, align 4
55 %inc = add nsw i32 %5, 1
56 store i32 %inc, i32* %i, align 4
59 for.end: ; preds = %for.cond
60 %6 = load i32* %a.addr, align 4
61 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
62 %7 = load i32* %arrayidx2, align 4
66 ; /// Different objects, positive induction with widening slide
67 ; int noAlias02 (int a) {
69 ; for (i=0; i<SIZE-10; i++)
70 ; Foo.A[i] = Foo.B[i+10] + a;
73 ; CHECK: define i32 @noAlias02
74 ; CHECK: add nsw <4 x i32>
77 define i32 @noAlias02(i32 %a) {
79 %a.addr = alloca i32, align 4
80 %i = alloca i32, align 4
81 store i32 %a, i32* %a.addr, align 4
82 store i32 0, i32* %i, align 4
85 for.cond: ; preds = %for.inc, %entry
86 %0 = load i32* %i, align 4
87 %cmp = icmp slt i32 %0, 90
88 br i1 %cmp, label %for.body, label %for.end
90 for.body: ; preds = %for.cond
91 %1 = load i32* %i, align 4
92 %add = add nsw i32 %1, 10
93 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
94 %2 = load i32* %arrayidx, align 4
95 %3 = load i32* %a.addr, align 4
96 %add1 = add nsw i32 %2, %3
97 %4 = load i32* %i, align 4
98 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
99 store i32 %add1, i32* %arrayidx2, align 4
102 for.inc: ; preds = %for.body
103 %5 = load i32* %i, align 4
104 %inc = add nsw i32 %5, 1
105 store i32 %inc, i32* %i, align 4
108 for.end: ; preds = %for.cond
109 %6 = load i32* %a.addr, align 4
110 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
111 %7 = load i32* %arrayidx3, align 4
115 ; /// Different objects, positive induction with shortening slide
116 ; int noAlias03 (int a) {
118 ; for (i=0; i<SIZE; i++)
119 ; Foo.A[i+10] = Foo.B[i] + a;
122 ; CHECK: define i32 @noAlias03
123 ; CHECK: add nsw <4 x i32>
126 define i32 @noAlias03(i32 %a) {
128 %a.addr = alloca i32, align 4
129 %i = alloca i32, align 4
130 store i32 %a, i32* %a.addr, align 4
131 store i32 0, i32* %i, align 4
134 for.cond: ; preds = %for.inc, %entry
135 %0 = load i32* %i, align 4
136 %cmp = icmp slt i32 %0, 100
137 br i1 %cmp, label %for.body, label %for.end
139 for.body: ; preds = %for.cond
140 %1 = load i32* %i, align 4
141 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
142 %2 = load i32* %arrayidx, align 4
143 %3 = load i32* %a.addr, align 4
144 %add = add nsw i32 %2, %3
145 %4 = load i32* %i, align 4
146 %add1 = add nsw i32 %4, 10
147 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
148 store i32 %add, i32* %arrayidx2, align 4
151 for.inc: ; preds = %for.body
152 %5 = load i32* %i, align 4
153 %inc = add nsw i32 %5, 1
154 store i32 %inc, i32* %i, align 4
157 for.end: ; preds = %for.cond
158 %6 = load i32* %a.addr, align 4
159 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
160 %7 = load i32* %arrayidx3, align 4
164 ; /// Pointer access, positive stride, run-time check added
165 ; int noAlias04 (int a) {
167 ; for (i=0; i<SIZE; i++)
168 ; *(PA+i) = *(PB+i) + a;
171 ; CHECK: define i32 @noAlias04
172 ; CHECK-NOT: add nsw <4 x i32>
175 ; TODO: This test vectorizes (with run-time check) on real targets with -O3)
176 ; Check why it's not being vectorized even when forcing vectorization
178 define i32 @noAlias04(i32 %a) #0 {
180 %a.addr = alloca i32, align 4
181 %i = alloca i32, align 4
182 store i32 %a, i32* %a.addr, align 4
183 store i32 0, i32* %i, align 4
186 for.cond: ; preds = %for.inc, %entry
187 %0 = load i32* %i, align 4
188 %cmp = icmp slt i32 %0, 100
189 br i1 %cmp, label %for.body, label %for.end
191 for.body: ; preds = %for.cond
192 %1 = load i32** @PB, align 4
193 %2 = load i32* %i, align 4
194 %add.ptr = getelementptr inbounds i32* %1, i32 %2
195 %3 = load i32* %add.ptr, align 4
196 %4 = load i32* %a.addr, align 4
197 %add = add nsw i32 %3, %4
198 %5 = load i32** @PA, align 4
199 %6 = load i32* %i, align 4
200 %add.ptr1 = getelementptr inbounds i32* %5, i32 %6
201 store i32 %add, i32* %add.ptr1, align 4
204 for.inc: ; preds = %for.body
205 %7 = load i32* %i, align 4
206 %inc = add nsw i32 %7, 1
207 store i32 %inc, i32* %i, align 4
210 for.end: ; preds = %for.cond
211 %8 = load i32** @PA, align 4
212 %9 = load i32* %a.addr, align 4
213 %add.ptr2 = getelementptr inbounds i32* %8, i32 %9
214 %10 = load i32* %add.ptr2, align 4
218 ; /// Different objects, positive induction, multi-array
219 ; int noAlias05 (int a) {
221 ; for (i=0; i<SIZE; i++)
222 ; Bar.A[N][i] = Bar.B[N][i] + a;
223 ; return Bar.A[N][a];
225 ; CHECK: define i32 @noAlias05
226 ; CHECK: add nsw <4 x i32>
229 define i32 @noAlias05(i32 %a) #0 {
231 %a.addr = alloca i32, align 4
232 %i = alloca i32, align 4
233 %N = alloca i32, align 4
234 store i32 %a, i32* %a.addr, align 4
235 store i32 10, i32* %N, align 4
236 store i32 0, i32* %i, align 4
239 for.cond: ; preds = %for.inc, %entry
240 %0 = load i32* %i, align 4
241 %cmp = icmp slt i32 %0, 100
242 br i1 %cmp, label %for.body, label %for.end
244 for.body: ; preds = %for.cond
245 %1 = load i32* %i, align 4
246 %2 = load i32* %N, align 4
247 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
248 %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
249 %3 = load i32* %arrayidx1, align 4
250 %4 = load i32* %a.addr, align 4
251 %add = add nsw i32 %3, %4
252 %5 = load i32* %i, align 4
253 %6 = load i32* %N, align 4
254 %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
255 %arrayidx3 = getelementptr inbounds [100 x i32]* %arrayidx2, i32 0, i32 %5
256 store i32 %add, i32* %arrayidx3, align 4
259 for.inc: ; preds = %for.body
260 %7 = load i32* %i, align 4
261 %inc = add nsw i32 %7, 1
262 store i32 %inc, i32* %i, align 4
265 for.end: ; preds = %for.cond
266 %8 = load i32* %a.addr, align 4
267 %9 = load i32* %N, align 4
268 %arrayidx4 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
269 %arrayidx5 = getelementptr inbounds [100 x i32]* %arrayidx4, i32 0, i32 %8
270 %10 = load i32* %arrayidx5, align 4
274 ; /// Same objects, positive induction, multi-array, different sub-elements
275 ; int noAlias06 (int a) {
277 ; for (i=0; i<SIZE; i++)
278 ; Bar.A[N][i] = Bar.A[N+1][i] + a;
279 ; return Bar.A[N][a];
281 ; CHECK: define i32 @noAlias06
282 ; CHECK: add nsw <4 x i32>
285 define i32 @noAlias06(i32 %a) #0 {
287 %a.addr = alloca i32, align 4
288 %i = alloca i32, align 4
289 %N = alloca i32, align 4
290 store i32 %a, i32* %a.addr, align 4
291 store i32 10, i32* %N, align 4
292 store i32 0, i32* %i, align 4
295 for.cond: ; preds = %for.inc, %entry
296 %0 = load i32* %i, align 4
297 %cmp = icmp slt i32 %0, 100
298 br i1 %cmp, label %for.body, label %for.end
300 for.body: ; preds = %for.cond
301 %1 = load i32* %i, align 4
302 %2 = load i32* %N, align 4
303 %add = add nsw i32 %2, 1
304 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
305 %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
306 %3 = load i32* %arrayidx1, align 4
307 %4 = load i32* %a.addr, align 4
308 %add2 = add nsw i32 %3, %4
309 %5 = load i32* %i, align 4
310 %6 = load i32* %N, align 4
311 %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
312 %arrayidx4 = getelementptr inbounds [100 x i32]* %arrayidx3, i32 0, i32 %5
313 store i32 %add2, i32* %arrayidx4, align 4
316 for.inc: ; preds = %for.body
317 %7 = load i32* %i, align 4
318 %inc = add nsw i32 %7, 1
319 store i32 %inc, i32* %i, align 4
322 for.end: ; preds = %for.cond
323 %8 = load i32* %a.addr, align 4
324 %9 = load i32* %N, align 4
325 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
326 %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %8
327 %10 = load i32* %arrayidx6, align 4
331 ; /// Different objects, negative induction, constant distance
332 ; int noAlias07 (int a) {
334 ; for (i=0; i<SIZE; i++)
335 ; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
338 ; CHECK: define i32 @noAlias07
339 ; CHECK: sub nsw <4 x i32>
342 define i32 @noAlias07(i32 %a) #0 {
344 %a.addr = alloca i32, align 4
345 %i = alloca i32, align 4
346 store i32 %a, i32* %a.addr, align 4
347 store i32 0, i32* %i, align 4
350 for.cond: ; preds = %for.inc, %entry
351 %0 = load i32* %i, align 4
352 %cmp = icmp slt i32 %0, 100
353 br i1 %cmp, label %for.body, label %for.end
355 for.body: ; preds = %for.cond
356 %1 = load i32* %i, align 4
357 %sub = sub nsw i32 100, %1
358 %sub1 = sub nsw i32 %sub, 1
359 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
360 %2 = load i32* %arrayidx, align 4
361 %3 = load i32* %a.addr, align 4
362 %add = add nsw i32 %2, %3
363 %4 = load i32* %i, align 4
364 %sub2 = sub nsw i32 100, %4
365 %sub3 = sub nsw i32 %sub2, 1
366 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
367 store i32 %add, i32* %arrayidx4, align 4
370 for.inc: ; preds = %for.body
371 %5 = load i32* %i, align 4
372 %inc = add nsw i32 %5, 1
373 store i32 %inc, i32* %i, align 4
376 for.end: ; preds = %for.cond
377 %6 = load i32* %a.addr, align 4
378 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
379 %7 = load i32* %arrayidx5, align 4
383 ; /// Different objects, negative induction, shortening slide
384 ; int noAlias08 (int a) {
386 ; for (i=0; i<SIZE-10; i++)
387 ; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
390 ; CHECK: define i32 @noAlias08
391 ; CHECK: sub nsw <4 x i32>
394 define i32 @noAlias08(i32 %a) #0 {
396 %a.addr = alloca i32, align 4
397 %i = alloca i32, align 4
398 store i32 %a, i32* %a.addr, align 4
399 store i32 0, i32* %i, align 4
402 for.cond: ; preds = %for.inc, %entry
403 %0 = load i32* %i, align 4
404 %cmp = icmp slt i32 %0, 90
405 br i1 %cmp, label %for.body, label %for.end
407 for.body: ; preds = %for.cond
408 %1 = load i32* %i, align 4
409 %sub = sub nsw i32 100, %1
410 %sub1 = sub nsw i32 %sub, 10
411 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
412 %2 = load i32* %arrayidx, align 4
413 %3 = load i32* %a.addr, align 4
414 %add = add nsw i32 %2, %3
415 %4 = load i32* %i, align 4
416 %sub2 = sub nsw i32 100, %4
417 %sub3 = sub nsw i32 %sub2, 1
418 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
419 store i32 %add, i32* %arrayidx4, align 4
422 for.inc: ; preds = %for.body
423 %5 = load i32* %i, align 4
424 %inc = add nsw i32 %5, 1
425 store i32 %inc, i32* %i, align 4
428 for.end: ; preds = %for.cond
429 %6 = load i32* %a.addr, align 4
430 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
431 %7 = load i32* %arrayidx5, align 4
435 ; /// Different objects, negative induction, widening slide
436 ; int noAlias09 (int a) {
438 ; for (i=0; i<SIZE; i++)
439 ; Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
442 ; CHECK: define i32 @noAlias09
443 ; CHECK: sub nsw <4 x i32>
446 define i32 @noAlias09(i32 %a) #0 {
448 %a.addr = alloca i32, align 4
449 %i = alloca i32, align 4
450 store i32 %a, i32* %a.addr, align 4
451 store i32 0, i32* %i, align 4
454 for.cond: ; preds = %for.inc, %entry
455 %0 = load i32* %i, align 4
456 %cmp = icmp slt i32 %0, 100
457 br i1 %cmp, label %for.body, label %for.end
459 for.body: ; preds = %for.cond
460 %1 = load i32* %i, align 4
461 %sub = sub nsw i32 100, %1
462 %sub1 = sub nsw i32 %sub, 1
463 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
464 %2 = load i32* %arrayidx, align 4
465 %3 = load i32* %a.addr, align 4
466 %add = add nsw i32 %2, %3
467 %4 = load i32* %i, align 4
468 %sub2 = sub nsw i32 100, %4
469 %sub3 = sub nsw i32 %sub2, 10
470 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
471 store i32 %add, i32* %arrayidx4, align 4
474 for.inc: ; preds = %for.body
475 %5 = load i32* %i, align 4
476 %inc = add nsw i32 %5, 1
477 store i32 %inc, i32* %i, align 4
480 for.end: ; preds = %for.cond
481 %6 = load i32* %a.addr, align 4
482 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
483 %7 = load i32* %arrayidx5, align 4
487 ; /// Pointer access, negative stride, run-time check added
488 ; int noAlias10 (int a) {
490 ; for (i=0; i<SIZE; i++)
491 ; *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
494 ; CHECK: define i32 @noAlias10
495 ; CHECK-NOT: sub nsw <4 x i32>
498 ; TODO: This test vectorizes (with run-time check) on real targets with -O3)
499 ; Check why it's not being vectorized even when forcing vectorization
501 define i32 @noAlias10(i32 %a) #0 {
503 %a.addr = alloca i32, align 4
504 %i = alloca i32, align 4
505 store i32 %a, i32* %a.addr, align 4
506 store i32 0, i32* %i, align 4
509 for.cond: ; preds = %for.inc, %entry
510 %0 = load i32* %i, align 4
511 %cmp = icmp slt i32 %0, 100
512 br i1 %cmp, label %for.body, label %for.end
514 for.body: ; preds = %for.cond
515 %1 = load i32** @PB, align 4
516 %add.ptr = getelementptr inbounds i32* %1, i32 100
517 %2 = load i32* %i, align 4
518 %idx.neg = sub i32 0, %2
519 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
520 %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
521 %3 = load i32* %add.ptr2, align 4
522 %4 = load i32* %a.addr, align 4
523 %add = add nsw i32 %3, %4
524 %5 = load i32** @PA, align 4
525 %add.ptr3 = getelementptr inbounds i32* %5, i32 100
526 %6 = load i32* %i, align 4
527 %idx.neg4 = sub i32 0, %6
528 %add.ptr5 = getelementptr inbounds i32* %add.ptr3, i32 %idx.neg4
529 %add.ptr6 = getelementptr inbounds i32* %add.ptr5, i32 -1
530 store i32 %add, i32* %add.ptr6, align 4
533 for.inc: ; preds = %for.body
534 %7 = load i32* %i, align 4
535 %inc = add nsw i32 %7, 1
536 store i32 %inc, i32* %i, align 4
539 for.end: ; preds = %for.cond
540 %8 = load i32** @PA, align 4
541 %9 = load i32* %a.addr, align 4
542 %add.ptr7 = getelementptr inbounds i32* %8, i32 %9
543 %10 = load i32* %add.ptr7, align 4
547 ; /// Different objects, negative induction, multi-array
548 ; int noAlias11 (int a) {
550 ; for (i=0; i<SIZE; i++)
551 ; Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
552 ; return Bar.A[N][a];
554 ; CHECK: define i32 @noAlias11
555 ; CHECK: sub nsw <4 x i32>
558 define i32 @noAlias11(i32 %a) #0 {
560 %a.addr = alloca i32, align 4
561 %i = alloca i32, align 4
562 %N = alloca i32, align 4
563 store i32 %a, i32* %a.addr, align 4
564 store i32 10, i32* %N, align 4
565 store i32 0, i32* %i, align 4
568 for.cond: ; preds = %for.inc, %entry
569 %0 = load i32* %i, align 4
570 %cmp = icmp slt i32 %0, 100
571 br i1 %cmp, label %for.body, label %for.end
573 for.body: ; preds = %for.cond
574 %1 = load i32* %i, align 4
575 %sub = sub nsw i32 100, %1
576 %sub1 = sub nsw i32 %sub, 1
577 %2 = load i32* %N, align 4
578 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
579 %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
580 %3 = load i32* %arrayidx2, align 4
581 %4 = load i32* %a.addr, align 4
582 %add = add nsw i32 %3, %4
583 %5 = load i32* %i, align 4
584 %sub3 = sub nsw i32 100, %5
585 %sub4 = sub nsw i32 %sub3, 1
586 %6 = load i32* %N, align 4
587 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
588 %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %sub4
589 store i32 %add, i32* %arrayidx6, align 4
592 for.inc: ; preds = %for.body
593 %7 = load i32* %i, align 4
594 %inc = add nsw i32 %7, 1
595 store i32 %inc, i32* %i, align 4
598 for.end: ; preds = %for.cond
599 %8 = load i32* %a.addr, align 4
600 %9 = load i32* %N, align 4
601 %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
602 %arrayidx8 = getelementptr inbounds [100 x i32]* %arrayidx7, i32 0, i32 %8
603 %10 = load i32* %arrayidx8, align 4
607 ; /// Same objects, negative induction, multi-array, different sub-elements
608 ; int noAlias12 (int a) {
610 ; for (i=0; i<SIZE; i++)
611 ; Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
612 ; return Bar.A[N][a];
614 ; CHECK: define i32 @noAlias12
615 ; CHECK: sub nsw <4 x i32>
618 define i32 @noAlias12(i32 %a) #0 {
620 %a.addr = alloca i32, align 4
621 %i = alloca i32, align 4
622 %N = alloca i32, align 4
623 store i32 %a, i32* %a.addr, align 4
624 store i32 10, i32* %N, align 4
625 store i32 0, i32* %i, align 4
628 for.cond: ; preds = %for.inc, %entry
629 %0 = load i32* %i, align 4
630 %cmp = icmp slt i32 %0, 100
631 br i1 %cmp, label %for.body, label %for.end
633 for.body: ; preds = %for.cond
634 %1 = load i32* %i, align 4
635 %sub = sub nsw i32 100, %1
636 %sub1 = sub nsw i32 %sub, 1
637 %2 = load i32* %N, align 4
638 %add = add nsw i32 %2, 1
639 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
640 %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
641 %3 = load i32* %arrayidx2, align 4
642 %4 = load i32* %a.addr, align 4
643 %add3 = add nsw i32 %3, %4
644 %5 = load i32* %i, align 4
645 %sub4 = sub nsw i32 100, %5
646 %sub5 = sub nsw i32 %sub4, 1
647 %6 = load i32* %N, align 4
648 %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
649 %arrayidx7 = getelementptr inbounds [100 x i32]* %arrayidx6, i32 0, i32 %sub5
650 store i32 %add3, i32* %arrayidx7, align 4
653 for.inc: ; preds = %for.body
654 %7 = load i32* %i, align 4
655 %inc = add nsw i32 %7, 1
656 store i32 %inc, i32* %i, align 4
659 for.end: ; preds = %for.cond
660 %8 = load i32* %a.addr, align 4
661 %9 = load i32* %N, align 4
662 %arrayidx8 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
663 %arrayidx9 = getelementptr inbounds [100 x i32]* %arrayidx8, i32 0, i32 %8
664 %10 = load i32* %arrayidx9, align 4
668 ; /// Same objects, positive induction, constant distance, just enough for vector size
669 ; int noAlias13 (int a) {
671 ; for (i=0; i<SIZE; i++)
672 ; Foo.A[i] = Foo.A[i+4] + a;
675 ; CHECK: define i32 @noAlias13
676 ; CHECK: add nsw <4 x i32>
679 define i32 @noAlias13(i32 %a) #0 {
681 %a.addr = alloca i32, align 4
682 %i = alloca i32, align 4
683 store i32 %a, i32* %a.addr, align 4
684 store i32 0, i32* %i, align 4
687 for.cond: ; preds = %for.inc, %entry
688 %0 = load i32* %i, align 4
689 %cmp = icmp slt i32 %0, 100
690 br i1 %cmp, label %for.body, label %for.end
692 for.body: ; preds = %for.cond
693 %1 = load i32* %i, align 4
694 %add = add nsw i32 %1, 4
695 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
696 %2 = load i32* %arrayidx, align 4
697 %3 = load i32* %a.addr, align 4
698 %add1 = add nsw i32 %2, %3
699 %4 = load i32* %i, align 4
700 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
701 store i32 %add1, i32* %arrayidx2, align 4
704 for.inc: ; preds = %for.body
705 %5 = load i32* %i, align 4
706 %inc = add nsw i32 %5, 1
707 store i32 %inc, i32* %i, align 4
710 for.end: ; preds = %for.cond
711 %6 = load i32* %a.addr, align 4
712 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
713 %7 = load i32* %arrayidx3, align 4
717 ; /// Same objects, negative induction, constant distance, just enough for vector size
718 ; int noAlias14 (int a) {
720 ; for (i=0; i<SIZE; i++)
721 ; Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
724 ; CHECK: define i32 @noAlias14
725 ; CHECK: sub nsw <4 x i32>
728 define i32 @noAlias14(i32 %a) #0 {
730 %a.addr = alloca i32, align 4
731 %i = alloca i32, align 4
732 store i32 %a, i32* %a.addr, align 4
733 store i32 0, i32* %i, align 4
736 for.cond: ; preds = %for.inc, %entry
737 %0 = load i32* %i, align 4
738 %cmp = icmp slt i32 %0, 100
739 br i1 %cmp, label %for.body, label %for.end
741 for.body: ; preds = %for.cond
742 %1 = load i32* %i, align 4
743 %sub = sub nsw i32 100, %1
744 %sub1 = sub nsw i32 %sub, 5
745 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
746 %2 = load i32* %arrayidx, align 4
747 %3 = load i32* %a.addr, align 4
748 %add = add nsw i32 %2, %3
749 %4 = load i32* %i, align 4
750 %sub2 = sub nsw i32 100, %4
751 %sub3 = sub nsw i32 %sub2, 1
752 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
753 store i32 %add, i32* %arrayidx4, align 4
756 for.inc: ; preds = %for.body
757 %5 = load i32* %i, align 4
758 %inc = add nsw i32 %5, 1
759 store i32 %inc, i32* %i, align 4
762 for.end: ; preds = %for.cond
763 %6 = load i32* %a.addr, align 4
764 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
765 %7 = load i32* %arrayidx5, align 4
770 ;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
773 ; /// Different objects, swapped induction, alias at the end
774 ; int mayAlias01 (int a) {
776 ; for (i=0; i<SIZE; i++)
777 ; Foo.A[i] = Foo.B[SIZE-i-1] + a;
780 ; CHECK: define i32 @mayAlias01
781 ; CHECK-NOT: add nsw <4 x i32>
784 define i32 @mayAlias01(i32 %a) nounwind {
786 %a.addr = alloca i32, align 4
787 %i = alloca i32, align 4
788 store i32 %a, i32* %a.addr, align 4
789 store i32 0, i32* %i, align 4
792 for.cond: ; preds = %for.inc, %entry
793 %0 = load i32* %i, align 4
794 %cmp = icmp slt i32 %0, 100
795 br i1 %cmp, label %for.body, label %for.end
797 for.body: ; preds = %for.cond
798 %1 = load i32* %i, align 4
799 %sub = sub nsw i32 100, %1
800 %sub1 = sub nsw i32 %sub, 1
801 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
802 %2 = load i32* %arrayidx, align 4
803 %3 = load i32* %a.addr, align 4
804 %add = add nsw i32 %2, %3
805 %4 = load i32* %i, align 4
806 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
807 store i32 %add, i32* %arrayidx2, align 4
810 for.inc: ; preds = %for.body
811 %5 = load i32* %i, align 4
812 %inc = add nsw i32 %5, 1
813 store i32 %inc, i32* %i, align 4
816 for.end: ; preds = %for.cond
817 %6 = load i32* %a.addr, align 4
818 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
819 %7 = load i32* %arrayidx3, align 4
823 ; /// Different objects, swapped induction, alias at the beginning
824 ; int mayAlias02 (int a) {
826 ; for (i=0; i<SIZE; i++)
827 ; Foo.A[SIZE-i-1] = Foo.B[i] + a;
830 ; CHECK: define i32 @mayAlias02
831 ; CHECK-NOT: add nsw <4 x i32>
834 define i32 @mayAlias02(i32 %a) nounwind {
836 %a.addr = alloca i32, align 4
837 %i = alloca i32, align 4
838 store i32 %a, i32* %a.addr, align 4
839 store i32 0, i32* %i, align 4
842 for.cond: ; preds = %for.inc, %entry
843 %0 = load i32* %i, align 4
844 %cmp = icmp slt i32 %0, 100
845 br i1 %cmp, label %for.body, label %for.end
847 for.body: ; preds = %for.cond
848 %1 = load i32* %i, align 4
849 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
850 %2 = load i32* %arrayidx, align 4
851 %3 = load i32* %a.addr, align 4
852 %add = add nsw i32 %2, %3
853 %4 = load i32* %i, align 4
854 %sub = sub nsw i32 100, %4
855 %sub1 = sub nsw i32 %sub, 1
856 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
857 store i32 %add, i32* %arrayidx2, align 4
860 for.inc: ; preds = %for.body
861 %5 = load i32* %i, align 4
862 %inc = add nsw i32 %5, 1
863 store i32 %inc, i32* %i, align 4
866 for.end: ; preds = %for.cond
867 %6 = load i32* %a.addr, align 4
868 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
869 %7 = load i32* %arrayidx3, align 4
873 ; /// Pointer access, run-time check added
874 ; int mayAlias03 (int a) {
876 ; for (i=0; i<SIZE; i++)
877 ; *(PA+i) = *(PB+SIZE-i-1) + a;
880 ; CHECK: define i32 @mayAlias03
881 ; CHECK-NOT: add nsw <4 x i32>
884 define i32 @mayAlias03(i32 %a) nounwind {
886 %a.addr = alloca i32, align 4
887 %i = alloca i32, align 4
888 store i32 %a, i32* %a.addr, align 4
889 store i32 0, i32* %i, align 4
892 for.cond: ; preds = %for.inc, %entry
893 %0 = load i32* %i, align 4
894 %cmp = icmp slt i32 %0, 100
895 br i1 %cmp, label %for.body, label %for.end
897 for.body: ; preds = %for.cond
898 %1 = load i32** @PB, align 4
899 %add.ptr = getelementptr inbounds i32* %1, i32 100
900 %2 = load i32* %i, align 4
901 %idx.neg = sub i32 0, %2
902 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
903 %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
904 %3 = load i32* %add.ptr2, align 4
905 %4 = load i32* %a.addr, align 4
906 %add = add nsw i32 %3, %4
907 %5 = load i32** @PA, align 4
908 %6 = load i32* %i, align 4
909 %add.ptr3 = getelementptr inbounds i32* %5, i32 %6
910 store i32 %add, i32* %add.ptr3, align 4
913 for.inc: ; preds = %for.body
914 %7 = load i32* %i, align 4
915 %inc = add nsw i32 %7, 1
916 store i32 %inc, i32* %i, align 4
919 for.end: ; preds = %for.cond
920 %8 = load i32** @PA, align 4
921 %9 = load i32* %a.addr, align 4
922 %add.ptr4 = getelementptr inbounds i32* %8, i32 %9
923 %10 = load i32* %add.ptr4, align 4
928 ;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) ===
931 ; int mustAlias01 (int a) {
933 ; for (i=0; i<SIZE; i++)
934 ; Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
937 ; CHECK: define i32 @mustAlias01
938 ; CHECK-NOT: add nsw <4 x i32>
941 define i32 @mustAlias01(i32 %a) nounwind {
943 %a.addr = alloca i32, align 4
944 %i = alloca i32, align 4
945 store i32 %a, i32* %a.addr, align 4
946 store i32 0, i32* %i, align 4
949 for.cond: ; preds = %for.inc, %entry
950 %0 = load i32* %i, align 4
951 %cmp = icmp slt i32 %0, 100
952 br i1 %cmp, label %for.body, label %for.end
954 for.body: ; preds = %for.cond
955 %1 = load i32* %i, align 4
956 %sub = sub nsw i32 100, %1
957 %sub1 = sub nsw i32 %sub, 1
958 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
959 %2 = load i32* %arrayidx, align 4
960 %3 = load i32* %a.addr, align 4
961 %add = add nsw i32 %2, %3
962 %4 = load i32* %i, align 4
963 %add2 = add nsw i32 %4, 10
964 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
965 store i32 %add, i32* %arrayidx3, align 4
968 for.inc: ; preds = %for.body
969 %5 = load i32* %i, align 4
970 %inc = add nsw i32 %5, 1
971 store i32 %inc, i32* %i, align 4
974 for.end: ; preds = %for.cond
975 %6 = load i32* %a.addr, align 4
976 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
977 %7 = load i32* %arrayidx4, align 4
981 ; int mustAlias02 (int a) {
983 ; for (i=0; i<SIZE; i++)
984 ; Foo.A[i] = Foo.B[SIZE-i-10] + a;
987 ; CHECK: define i32 @mustAlias02
988 ; CHECK-NOT: add nsw <4 x i32>
991 define i32 @mustAlias02(i32 %a) nounwind {
993 %a.addr = alloca i32, align 4
994 %i = alloca i32, align 4
995 store i32 %a, i32* %a.addr, align 4
996 store i32 0, i32* %i, align 4
999 for.cond: ; preds = %for.inc, %entry
1000 %0 = load i32* %i, align 4
1001 %cmp = icmp slt i32 %0, 100
1002 br i1 %cmp, label %for.body, label %for.end
1004 for.body: ; preds = %for.cond
1005 %1 = load i32* %i, align 4
1006 %sub = sub nsw i32 100, %1
1007 %sub1 = sub nsw i32 %sub, 10
1008 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
1009 %2 = load i32* %arrayidx, align 4
1010 %3 = load i32* %a.addr, align 4
1011 %add = add nsw i32 %2, %3
1012 %4 = load i32* %i, align 4
1013 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
1014 store i32 %add, i32* %arrayidx2, align 4
1017 for.inc: ; preds = %for.body
1018 %5 = load i32* %i, align 4
1019 %inc = add nsw i32 %5, 1
1020 store i32 %inc, i32* %i, align 4
1023 for.end: ; preds = %for.cond
1024 %6 = load i32* %a.addr, align 4
1025 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
1026 %7 = load i32* %arrayidx3, align 4
1030 ; int mustAlias03 (int a) {
1032 ; for (i=0; i<SIZE; i++)
1033 ; Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
1036 ; CHECK: define i32 @mustAlias03
1037 ; CHECK-NOT: add nsw <4 x i32>
1040 define i32 @mustAlias03(i32 %a) nounwind {
1042 %a.addr = alloca i32, align 4
1043 %i = alloca i32, align 4
1044 store i32 %a, i32* %a.addr, align 4
1045 store i32 0, i32* %i, align 4
1048 for.cond: ; preds = %for.inc, %entry
1049 %0 = load i32* %i, align 4
1050 %cmp = icmp slt i32 %0, 100
1051 br i1 %cmp, label %for.body, label %for.end
1053 for.body: ; preds = %for.cond
1054 %1 = load i32* %i, align 4
1055 %sub = sub nsw i32 100, %1
1056 %sub1 = sub nsw i32 %sub, 10
1057 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
1058 %2 = load i32* %arrayidx, align 4
1059 %3 = load i32* %a.addr, align 4
1060 %add = add nsw i32 %2, %3
1061 %4 = load i32* %i, align 4
1062 %add2 = add nsw i32 %4, 10
1063 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
1064 store i32 %add, i32* %arrayidx3, align 4
1067 for.inc: ; preds = %for.body
1068 %5 = load i32* %i, align 4
1069 %inc = add nsw i32 %5, 1
1070 store i32 %inc, i32* %i, align 4
1073 for.end: ; preds = %for.cond
1074 %6 = load i32* %a.addr, align 4
1075 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
1076 %7 = load i32* %arrayidx4, align 4