1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
2 ; RUN: -verify-machineinstrs | FileCheck %s
4 ; Verify that misched resource/latency balancy heuristics are sane.
6 define void @unrolled_mmult1(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
7 i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
8 i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
13 ; imull folded loads should be in order and interleaved with addl, never
14 ; adjacent. Also check that we have no spilling.
16 ; Since mmult1 IR is already in good order, this effectively ensure
17 ; the scheduler maintains source order.
22 ; CHECK-NOT: {{imull|rsp}}
25 ; CHECK-NOT: {{imull|rsp}}
28 ; CHECK-NOT: {{imull|rsp}}
31 ; CHECK-NOT: {{imull|rsp}}
34 ; CHECK-NOT: {{imull|rsp}}
37 ; CHECK-NOT: {{imull|rsp}}
40 ; CHECK-NOT: {{imull|rsp}}
43 ; CHECK-NOT: {{imull|rsp}}
46 ; CHECK-NOT: {{imull|rsp}}
48 ; CHECK-NOT: {{imull|rsp}}
51 %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
52 %tmp57 = load i32* %tmp56, align 4
53 %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
54 %tmp58 = load i32* %arrayidx12.us.i61, align 4
55 %mul.us.i = mul nsw i32 %tmp58, %tmp57
56 %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
57 %tmp59 = load i32* %arrayidx8.us.i.1, align 4
58 %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
59 %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
60 %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
61 %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
62 %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
63 %tmp61 = load i32* %arrayidx8.us.i.2, align 4
64 %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
65 %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
66 %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
67 %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
68 %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
69 %tmp63 = load i32* %arrayidx8.us.i.3, align 4
70 %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
71 %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
72 %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
73 %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
74 %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
75 %tmp65 = load i32* %arrayidx8.us.i.4, align 4
76 %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
77 %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
78 %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
79 %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
80 %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
81 %tmp67 = load i32* %arrayidx8.us.i.5, align 4
82 %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
83 %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
84 %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
85 %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
86 %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
87 %tmp69 = load i32* %arrayidx8.us.i.6, align 4
88 %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
89 %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
90 %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
91 %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
92 %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
93 %tmp71 = load i32* %arrayidx8.us.i.7, align 4
94 %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
95 %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
96 %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
97 %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
98 %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
99 %tmp73 = load i32* %arrayidx8.us.i.8, align 4
100 %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
101 %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
102 %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
103 %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
104 %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
105 %tmp75 = load i32* %arrayidx8.us.i.9, align 4
106 %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
107 %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
108 %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
109 %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
110 %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
111 store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
112 %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
113 %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
114 %exitcond = icmp eq i32 %lftr.wideiv, 10
115 br i1 %exitcond, label %end, label %for.body
121 ; Unlike the above loop, this IR starts out bad and must be
127 ; CHECK-NOT: {{imull|rsp}}
130 ; CHECK-NOT: {{imull|rsp}}
133 ; CHECK-NOT: {{imull|rsp}}
136 ; CHECK-NOT: {{imull|rsp}}
139 ; CHECK-NOT: {{imull|rsp}}
142 ; CHECK-NOT: {{imull|rsp}}
145 ; CHECK-NOT: {{imull|rsp}}
148 ; CHECK-NOT: {{imull|rsp}}
151 ; CHECK-NOT: {{imull|rsp}}
153 ; CHECK-NOT: {{imull|rsp}}
155 define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
156 i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
157 i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
158 nounwind uwtable ssp {
162 %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
163 %tmp57 = load i32* %tmp56, align 4
164 %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
165 %tmp58 = load i32* %arrayidx12.us.i61, align 4
166 %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
167 %tmp59 = load i32* %arrayidx8.us.i.1, align 4
168 %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
169 %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
170 %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
171 %tmp61 = load i32* %arrayidx8.us.i.2, align 4
172 %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
173 %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
174 %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
175 %tmp63 = load i32* %arrayidx8.us.i.3, align 4
176 %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
177 %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
178 %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
179 %tmp65 = load i32* %arrayidx8.us.i.4, align 4
180 %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
181 %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
182 %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
183 %tmp67 = load i32* %arrayidx8.us.i.5, align 4
184 %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
185 %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
186 %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
187 %tmp69 = load i32* %arrayidx8.us.i.6, align 4
188 %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
189 %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
190 %mul.us.i = mul nsw i32 %tmp58, %tmp57
191 %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
192 %tmp71 = load i32* %arrayidx8.us.i.7, align 4
193 %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
194 %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
195 %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
196 %tmp73 = load i32* %arrayidx8.us.i.8, align 4
197 %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
198 %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
199 %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
200 %tmp75 = load i32* %arrayidx8.us.i.9, align 4
201 %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
202 %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
203 %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
204 %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
205 %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
206 %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
207 %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
208 %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
209 %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
210 %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
211 %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
212 %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
213 %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
214 %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
215 %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
216 %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
217 %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
218 %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
219 %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
220 %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
221 %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
222 store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
223 %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
224 %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
225 %exitcond = icmp eq i32 %lftr.wideiv, 10
226 br i1 %exitcond, label %end, label %for.body