1 ; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
3 ; @sharedidx is an unrolled variant of this loop:
4 ; for (unsigned long i = 0; i < len; i += s) {
7 ; where 's' cannot be folded into the addressing mode.
9 ; This is not quite profitable to chain. But with -stress-ivchain, we
10 ; can form three address chains in place of the shared induction
14 define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
17 %cmp8 = icmp eq i32 %len, 0
18 br i1 %cmp8, label %for.end, label %for.body
20 for.body: ; preds = %entry, %for.body.3
22 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
23 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
24 %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
25 %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
26 %0 = load i8* %arrayidx, align 1
27 %conv6 = zext i8 %0 to i32
28 %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
29 %1 = load i8* %arrayidx1, align 1
30 %conv27 = zext i8 %1 to i32
31 %add = add nsw i32 %conv27, %conv6
32 %conv3 = trunc i32 %add to i8
33 %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
34 store i8 %conv3, i8* %arrayidx4, align 1
35 %add5 = add i32 %i.09, %s
36 %cmp = icmp ult i32 %add5, %len
37 br i1 %cmp, label %for.body.1, label %for.end
39 for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
42 for.body.1: ; preds = %for.body
44 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
45 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
46 %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
47 %2 = load i8* %arrayidx.1, align 1
48 %conv6.1 = zext i8 %2 to i32
49 %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
50 %3 = load i8* %arrayidx1.1, align 1
51 %conv27.1 = zext i8 %3 to i32
52 %add.1 = add nsw i32 %conv27.1, %conv6.1
53 %conv3.1 = trunc i32 %add.1 to i8
54 %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
55 store i8 %conv3.1, i8* %arrayidx4.1, align 1
56 %add5.1 = add i32 %add5, %s
57 %cmp.1 = icmp ult i32 %add5.1, %len
58 br i1 %cmp.1, label %for.body.2, label %for.end
60 for.body.2: ; preds = %for.body.1
62 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
63 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
64 %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
65 %4 = load i8* %arrayidx.2, align 1
66 %conv6.2 = zext i8 %4 to i32
67 %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
68 %5 = load i8* %arrayidx1.2, align 1
69 %conv27.2 = zext i8 %5 to i32
70 %add.2 = add nsw i32 %conv27.2, %conv6.2
71 %conv3.2 = trunc i32 %add.2 to i8
72 %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
73 store i8 %conv3.2, i8* %arrayidx4.2, align 1
74 %add5.2 = add i32 %add5.1, %s
75 %cmp.2 = icmp ult i32 %add5.2, %len
76 br i1 %cmp.2, label %for.body.3, label %for.end
78 for.body.3: ; preds = %for.body.2
80 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
81 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
82 %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
83 %6 = load i8* %arrayidx.3, align 1
84 %conv6.3 = zext i8 %6 to i32
85 %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
86 %7 = load i8* %arrayidx1.3, align 1
87 %conv27.3 = zext i8 %7 to i32
88 %add.3 = add nsw i32 %conv27.3, %conv6.3
89 %conv3.3 = trunc i32 %add.3 to i8
90 %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
91 store i8 %conv3.3, i8* %arrayidx4.3, align 1
92 %add5.3 = add i32 %add5.2, %s
93 %cmp.3 = icmp ult i32 %add5.3, %len
94 br i1 %cmp.3, label %for.body, label %for.end