1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
2 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5 target triple = "x86_64-apple-macosx10.8.0"
7 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
8 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
10 ; CHECK: merge_const_store
11 ; save 1,2,3 ... as one big integer.
12 ; CHECK: movabsq $578437695752307201
14 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
15 %1 = icmp sgt i32 %count, 0
16 br i1 %1, label %.lr.ph, label %._crit_edge
18 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
19 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
20 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
21 store i8 1, i8* %2, align 1
22 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
23 store i8 2, i8* %3, align 1
24 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
25 store i8 3, i8* %4, align 1
26 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
27 store i8 4, i8* %5, align 1
28 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
29 store i8 5, i8* %6, align 1
30 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
31 store i8 6, i8* %7, align 1
32 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
33 store i8 7, i8* %8, align 1
34 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
35 store i8 8, i8* %9, align 1
36 %10 = add nsw i32 %i.02, 1
37 %11 = getelementptr inbounds %struct.A* %.01, i64 1
38 %exitcond = icmp eq i32 %10, %count
39 br i1 %exitcond, label %._crit_edge, label %.lr.ph
44 ; No vectors because we use noimplicitfloat
45 ; CHECK: merge_const_store_no_vec
48 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
49 %1 = icmp sgt i32 %count, 0
50 br i1 %1, label %.lr.ph, label %._crit_edge
52 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
53 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
54 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
55 store i32 0, i32* %2, align 4
56 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
57 store i32 0, i32* %3, align 4
58 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
59 store i32 0, i32* %4, align 4
60 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
61 store i32 0, i32* %5, align 4
62 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
63 store i32 0, i32* %6, align 4
64 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
65 store i32 0, i32* %7, align 4
66 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
67 store i32 0, i32* %8, align 4
68 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
69 store i32 0, i32* %9, align 4
70 %10 = add nsw i32 %i.02, 1
71 %11 = getelementptr inbounds %struct.B* %.01, i64 1
72 %exitcond = icmp eq i32 %10, %count
73 br i1 %exitcond, label %._crit_edge, label %.lr.ph
78 ; Move the constants using a single vector store.
79 ; CHECK: merge_const_store_vec
82 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
83 %1 = icmp sgt i32 %count, 0
84 br i1 %1, label %.lr.ph, label %._crit_edge
86 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
87 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
88 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
89 store i32 0, i32* %2, align 4
90 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
91 store i32 0, i32* %3, align 4
92 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
93 store i32 0, i32* %4, align 4
94 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
95 store i32 0, i32* %5, align 4
96 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
97 store i32 0, i32* %6, align 4
98 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
99 store i32 0, i32* %7, align 4
100 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
101 store i32 0, i32* %8, align 4
102 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
103 store i32 0, i32* %9, align 4
104 %10 = add nsw i32 %i.02, 1
105 %11 = getelementptr inbounds %struct.B* %.01, i64 1
106 %exitcond = icmp eq i32 %10, %count
107 br i1 %exitcond, label %._crit_edge, label %.lr.ph
112 ; Move the first 4 constants as a single vector. Move the rest as scalars.
113 ; CHECK: merge_nonconst_store
114 ; CHECK: movl $67305985
120 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
121 %1 = icmp sgt i32 %count, 0
122 br i1 %1, label %.lr.ph, label %._crit_edge
124 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
125 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
126 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
127 store i8 1, i8* %2, align 1
128 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
129 store i8 2, i8* %3, align 1
130 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
131 store i8 3, i8* %4, align 1
132 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
133 store i8 4, i8* %5, align 1
134 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
135 store i8 %zz, i8* %6, align 1 ; <----------- Not a const;
136 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
137 store i8 6, i8* %7, align 1
138 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
139 store i8 7, i8* %8, align 1
140 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
141 store i8 8, i8* %9, align 1
142 %10 = add nsw i32 %i.02, 1
143 %11 = getelementptr inbounds %struct.A* %.01, i64 1
144 %exitcond = icmp eq i32 %10, %count
145 br i1 %exitcond, label %._crit_edge, label %.lr.ph
151 ;CHECK-LABEL: merge_loads_i16:
157 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
158 %1 = icmp sgt i32 %count, 0
159 br i1 %1, label %.lr.ph, label %._crit_edge
162 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
163 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
166 ; <label>:4 ; preds = %4, %.lr.ph
167 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
168 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
169 %5 = load i8* %2, align 1
170 %6 = load i8* %3, align 1
171 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
172 store i8 %5, i8* %7, align 1
173 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
174 store i8 %6, i8* %8, align 1
175 %9 = add nsw i32 %i.02, 1
176 %10 = getelementptr inbounds %struct.A* %.01, i64 1
177 %exitcond = icmp eq i32 %9, %count
178 br i1 %exitcond, label %._crit_edge, label %4
180 ._crit_edge: ; preds = %4, %0
184 ; The loads and the stores are interleved. Can't merge them.
185 ;CHECK-LABEL: no_merge_loads:
191 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
192 %1 = icmp sgt i32 %count, 0
193 br i1 %1, label %.lr.ph, label %._crit_edge
196 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
197 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
200 a4: ; preds = %4, %.lr.ph
201 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
202 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
203 %a5 = load i8* %2, align 1
204 %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
205 store i8 %a5, i8* %a7, align 1
206 %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
207 %a6 = load i8* %3, align 1
208 store i8 %a6, i8* %a8, align 1
209 %a9 = add nsw i32 %i.02, 1
210 %a10 = getelementptr inbounds %struct.A* %.01, i64 1
211 %exitcond = icmp eq i32 %a9, %count
212 br i1 %exitcond, label %._crit_edge, label %a4
214 ._crit_edge: ; preds = %4, %0
219 ;CHECK-LABEL: merge_loads_integer:
225 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
226 %1 = icmp sgt i32 %count, 0
227 br i1 %1, label %.lr.ph, label %._crit_edge
230 %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
231 %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
234 ; <label>:4 ; preds = %4, %.lr.ph
235 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
236 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
239 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
240 store i32 %5, i32* %7
241 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
242 store i32 %6, i32* %8
243 %9 = add nsw i32 %i.02, 1
244 %10 = getelementptr inbounds %struct.B* %.01, i64 1
245 %exitcond = icmp eq i32 %9, %count
246 br i1 %exitcond, label %._crit_edge, label %4
248 ._crit_edge: ; preds = %4, %0
253 ;CHECK-LABEL: merge_loads_vector:
259 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
260 %a1 = icmp sgt i32 %count, 0
261 br i1 %a1, label %.lr.ph, label %._crit_edge
264 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
265 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
266 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
267 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
270 block4: ; preds = %4, %.lr.ph
271 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
272 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
273 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
274 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
275 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
276 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
281 store i32 %b1, i32* %a7
282 store i32 %b2, i32* %a8
283 store i32 %b3, i32* %a9
284 store i32 %b4, i32* %a10
285 %c9 = add nsw i32 %i.02, 1
286 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
287 %exitcond = icmp eq i32 %c9, %count
288 br i1 %exitcond, label %._crit_edge, label %block4
290 ._crit_edge: ; preds = %4, %0
294 ;CHECK-LABEL: merge_loads_no_align:
306 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
307 %a1 = icmp sgt i32 %count, 0
308 br i1 %a1, label %.lr.ph, label %._crit_edge
311 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
312 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
313 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
314 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
317 block4: ; preds = %4, %.lr.ph
318 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
319 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
320 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
321 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
322 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
323 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
324 %b1 = load i32* %a2, align 1
325 %b2 = load i32* %a3, align 1
326 %b3 = load i32* %a4, align 1
327 %b4 = load i32* %a5, align 1
328 store i32 %b1, i32* %a7, align 1
329 store i32 %b2, i32* %a8, align 1
330 store i32 %b3, i32* %a9, align 1
331 store i32 %b4, i32* %a10, align 1
332 %c9 = add nsw i32 %i.02, 1
333 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
334 %exitcond = icmp eq i32 %c9, %count
335 br i1 %exitcond, label %._crit_edge, label %block4
337 ._crit_edge: ; preds = %4, %0
341 ; Make sure that we merge the consecutive load/store sequence below and use a
342 ; word (16 bit) instead of a byte copy.
343 ; CHECK: MergeLoadStoreBaseIndexOffset
344 ; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
345 ; CHECK: movw [[REG]], (%{{.*}})
346 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
350 %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
351 %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
352 %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
353 %2 = getelementptr inbounds i64* %.0, i64 1
354 %3 = load i64* %.0, align 1
355 %4 = getelementptr inbounds i8* %c, i64 %3
356 %5 = load i8* %4, align 1
358 %7 = getelementptr inbounds i8* %c, i64 %6
359 %8 = load i8* %7, align 1
360 store i8 %5, i8* %.08, align 1
361 %9 = getelementptr inbounds i8* %.08, i64 1
362 store i8 %8, i8* %9, align 1
363 %10 = getelementptr inbounds i8* %.08, i64 2
364 %11 = add nsw i32 %.09, -1
365 %12 = icmp eq i32 %11, 0
366 br i1 %12, label %13, label %1
372 ; Make sure that we merge the consecutive load/store sequence below and use a
373 ; word (16 bit) instead of a byte copy even if there are intermediate sign
375 ; CHECK: MergeLoadStoreBaseIndexOffsetSext
376 ; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
377 ; CHECK: movw [[REG]], (%{{.*}})
378 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
382 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
383 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
384 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
385 %2 = getelementptr inbounds i8* %.0, i64 1
386 %3 = load i8* %.0, align 1
387 %4 = sext i8 %3 to i64
388 %5 = getelementptr inbounds i8* %c, i64 %4
389 %6 = load i8* %5, align 1
391 %8 = getelementptr inbounds i8* %c, i64 %7
392 %9 = load i8* %8, align 1
393 store i8 %6, i8* %.08, align 1
394 %10 = getelementptr inbounds i8* %.08, i64 1
395 store i8 %9, i8* %10, align 1
396 %11 = getelementptr inbounds i8* %.08, i64 2
397 %12 = add nsw i32 %.09, -1
398 %13 = icmp eq i32 %12, 0
399 br i1 %13, label %14, label %1
405 ; However, we can only merge ignore sign extensions when they are on all memory
407 ; CHECK: loadStoreBaseIndexOffsetSextNoSex
408 ; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
409 ; CHECK-NOT: movw [[REG]], (%{{.*}})
410 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
414 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
415 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
416 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
417 %2 = getelementptr inbounds i8* %.0, i64 1
418 %3 = load i8* %.0, align 1
419 %4 = sext i8 %3 to i64
420 %5 = getelementptr inbounds i8* %c, i64 %4
421 %6 = load i8* %5, align 1
423 %wrap.4 = sext i8 %7 to i64
424 %8 = getelementptr inbounds i8* %c, i64 %wrap.4
425 %9 = load i8* %8, align 1
426 store i8 %6, i8* %.08, align 1
427 %10 = getelementptr inbounds i8* %.08, i64 1
428 store i8 %9, i8* %10, align 1
429 %11 = getelementptr inbounds i8* %.08, i64 2
430 %12 = add nsw i32 %.09, -1
431 %13 = icmp eq i32 %12, 0
432 br i1 %13, label %14, label %1