1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4 target triple = "x86_64-apple-macosx10.8.0"
6 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
7 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
9 ; CHECK: merge_const_store
10 ; save 1,2,3 ... as one big integer.
11 ; CHECK: movabsq $578437695752307201
13 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
14 %1 = icmp sgt i32 %count, 0
15 br i1 %1, label %.lr.ph, label %._crit_edge
17 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
18 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
19 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
20 store i8 1, i8* %2, align 1
21 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
22 store i8 2, i8* %3, align 1
23 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
24 store i8 3, i8* %4, align 1
25 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
26 store i8 4, i8* %5, align 1
27 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
28 store i8 5, i8* %6, align 1
29 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
30 store i8 6, i8* %7, align 1
31 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
32 store i8 7, i8* %8, align 1
33 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
34 store i8 8, i8* %9, align 1
35 %10 = add nsw i32 %i.02, 1
36 %11 = getelementptr inbounds %struct.A* %.01, i64 1
37 %exitcond = icmp eq i32 %10, %count
38 br i1 %exitcond, label %._crit_edge, label %.lr.ph
43 ; No vectors because we use noimplicitfloat
44 ; CHECK: merge_const_store_no_vec
47 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
48 %1 = icmp sgt i32 %count, 0
49 br i1 %1, label %.lr.ph, label %._crit_edge
51 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
52 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
53 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
54 store i32 0, i32* %2, align 4
55 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
56 store i32 0, i32* %3, align 4
57 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
58 store i32 0, i32* %4, align 4
59 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
60 store i32 0, i32* %5, align 4
61 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
62 store i32 0, i32* %6, align 4
63 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
64 store i32 0, i32* %7, align 4
65 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
66 store i32 0, i32* %8, align 4
67 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
68 store i32 0, i32* %9, align 4
69 %10 = add nsw i32 %i.02, 1
70 %11 = getelementptr inbounds %struct.B* %.01, i64 1
71 %exitcond = icmp eq i32 %10, %count
72 br i1 %exitcond, label %._crit_edge, label %.lr.ph
77 ; Move the constants using a single vector store.
78 ; CHECK: merge_const_store_vec
81 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
82 %1 = icmp sgt i32 %count, 0
83 br i1 %1, label %.lr.ph, label %._crit_edge
85 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
86 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
87 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
88 store i32 0, i32* %2, align 4
89 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
90 store i32 0, i32* %3, align 4
91 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
92 store i32 0, i32* %4, align 4
93 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
94 store i32 0, i32* %5, align 4
95 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
96 store i32 0, i32* %6, align 4
97 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
98 store i32 0, i32* %7, align 4
99 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
100 store i32 0, i32* %8, align 4
101 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
102 store i32 0, i32* %9, align 4
103 %10 = add nsw i32 %i.02, 1
104 %11 = getelementptr inbounds %struct.B* %.01, i64 1
105 %exitcond = icmp eq i32 %10, %count
106 br i1 %exitcond, label %._crit_edge, label %.lr.ph
111 ; Move the first 4 constants as a single vector. Move the rest as scalars.
112 ; CHECK: merge_nonconst_store
113 ; CHECK: movl $67305985
119 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
120 %1 = icmp sgt i32 %count, 0
121 br i1 %1, label %.lr.ph, label %._crit_edge
123 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
124 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
125 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
126 store i8 1, i8* %2, align 1
127 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
128 store i8 2, i8* %3, align 1
129 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
130 store i8 3, i8* %4, align 1
131 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
132 store i8 4, i8* %5, align 1
133 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
134 store i8 %zz, i8* %6, align 1 ; <----------- Not a const;
135 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
136 store i8 6, i8* %7, align 1
137 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
138 store i8 7, i8* %8, align 1
139 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
140 store i8 8, i8* %9, align 1
141 %10 = add nsw i32 %i.02, 1
142 %11 = getelementptr inbounds %struct.A* %.01, i64 1
143 %exitcond = icmp eq i32 %10, %count
144 br i1 %exitcond, label %._crit_edge, label %.lr.ph
150 ;CHECK-LABEL: merge_loads_i16:
156 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
157 %1 = icmp sgt i32 %count, 0
158 br i1 %1, label %.lr.ph, label %._crit_edge
161 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
162 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
165 ; <label>:4 ; preds = %4, %.lr.ph
166 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
167 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
168 %5 = load i8* %2, align 1
169 %6 = load i8* %3, align 1
170 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
171 store i8 %5, i8* %7, align 1
172 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
173 store i8 %6, i8* %8, align 1
174 %9 = add nsw i32 %i.02, 1
175 %10 = getelementptr inbounds %struct.A* %.01, i64 1
176 %exitcond = icmp eq i32 %9, %count
177 br i1 %exitcond, label %._crit_edge, label %4
179 ._crit_edge: ; preds = %4, %0
183 ; The loads and the stores are interleved. Can't merge them.
184 ;CHECK-LABEL: no_merge_loads:
190 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
191 %1 = icmp sgt i32 %count, 0
192 br i1 %1, label %.lr.ph, label %._crit_edge
195 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
196 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
199 a4: ; preds = %4, %.lr.ph
200 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
201 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
202 %a5 = load i8* %2, align 1
203 %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
204 store i8 %a5, i8* %a7, align 1
205 %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
206 %a6 = load i8* %3, align 1
207 store i8 %a6, i8* %a8, align 1
208 %a9 = add nsw i32 %i.02, 1
209 %a10 = getelementptr inbounds %struct.A* %.01, i64 1
210 %exitcond = icmp eq i32 %a9, %count
211 br i1 %exitcond, label %._crit_edge, label %a4
213 ._crit_edge: ; preds = %4, %0
218 ;CHECK-LABEL: merge_loads_integer:
224 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
225 %1 = icmp sgt i32 %count, 0
226 br i1 %1, label %.lr.ph, label %._crit_edge
229 %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
230 %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
233 ; <label>:4 ; preds = %4, %.lr.ph
234 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
235 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
238 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
239 store i32 %5, i32* %7
240 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
241 store i32 %6, i32* %8
242 %9 = add nsw i32 %i.02, 1
243 %10 = getelementptr inbounds %struct.B* %.01, i64 1
244 %exitcond = icmp eq i32 %9, %count
245 br i1 %exitcond, label %._crit_edge, label %4
247 ._crit_edge: ; preds = %4, %0
252 ;CHECK-LABEL: merge_loads_vector:
258 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
259 %a1 = icmp sgt i32 %count, 0
260 br i1 %a1, label %.lr.ph, label %._crit_edge
263 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
264 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
265 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
266 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
269 block4: ; preds = %4, %.lr.ph
270 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
271 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
272 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
273 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
274 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
275 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
280 store i32 %b1, i32* %a7
281 store i32 %b2, i32* %a8
282 store i32 %b3, i32* %a9
283 store i32 %b4, i32* %a10
284 %c9 = add nsw i32 %i.02, 1
285 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
286 %exitcond = icmp eq i32 %c9, %count
287 br i1 %exitcond, label %._crit_edge, label %block4
289 ._crit_edge: ; preds = %4, %0
293 ;CHECK-LABEL: merge_loads_no_align:
305 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
306 %a1 = icmp sgt i32 %count, 0
307 br i1 %a1, label %.lr.ph, label %._crit_edge
310 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
311 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
312 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
313 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
316 block4: ; preds = %4, %.lr.ph
317 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
318 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
319 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
320 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
321 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
322 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
323 %b1 = load i32* %a2, align 1
324 %b2 = load i32* %a3, align 1
325 %b3 = load i32* %a4, align 1
326 %b4 = load i32* %a5, align 1
327 store i32 %b1, i32* %a7, align 1
328 store i32 %b2, i32* %a8, align 1
329 store i32 %b3, i32* %a9, align 1
330 store i32 %b4, i32* %a10, align 1
331 %c9 = add nsw i32 %i.02, 1
332 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
333 %exitcond = icmp eq i32 %c9, %count
334 br i1 %exitcond, label %._crit_edge, label %block4
336 ._crit_edge: ; preds = %4, %0
340 ; Make sure that we merge the consecutive load/store sequence below and use a
341 ; word (16 bit) instead of a byte copy.
342 ; CHECK: MergeLoadStoreBaseIndexOffset
343 ; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
344 ; CHECK: movw [[REG]], (%{{.*}})
345 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
349 %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
350 %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
351 %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
352 %2 = getelementptr inbounds i64* %.0, i64 1
353 %3 = load i64* %.0, align 1
354 %4 = getelementptr inbounds i8* %c, i64 %3
355 %5 = load i8* %4, align 1
357 %7 = getelementptr inbounds i8* %c, i64 %6
358 %8 = load i8* %7, align 1
359 store i8 %5, i8* %.08, align 1
360 %9 = getelementptr inbounds i8* %.08, i64 1
361 store i8 %8, i8* %9, align 1
362 %10 = getelementptr inbounds i8* %.08, i64 2
363 %11 = add nsw i32 %.09, -1
364 %12 = icmp eq i32 %11, 0
365 br i1 %12, label %13, label %1
371 ; Make sure that we merge the consecutive load/store sequence below and use a
372 ; word (16 bit) instead of a byte copy even if there are intermediate sign
374 ; CHECK: MergeLoadStoreBaseIndexOffsetSext
375 ; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
376 ; CHECK: movw [[REG]], (%{{.*}})
377 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
381 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
382 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
383 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
384 %2 = getelementptr inbounds i8* %.0, i64 1
385 %3 = load i8* %.0, align 1
386 %4 = sext i8 %3 to i64
387 %5 = getelementptr inbounds i8* %c, i64 %4
388 %6 = load i8* %5, align 1
390 %8 = getelementptr inbounds i8* %c, i64 %7
391 %9 = load i8* %8, align 1
392 store i8 %6, i8* %.08, align 1
393 %10 = getelementptr inbounds i8* %.08, i64 1
394 store i8 %9, i8* %10, align 1
395 %11 = getelementptr inbounds i8* %.08, i64 2
396 %12 = add nsw i32 %.09, -1
397 %13 = icmp eq i32 %12, 0
398 br i1 %13, label %14, label %1
404 ; However, we can only merge ignore sign extensions when they are on all memory
406 ; CHECK: loadStoreBaseIndexOffsetSextNoSex
407 ; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
408 ; CHECK-NOT: movw [[REG]], (%{{.*}})
409 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
413 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
414 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
415 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
416 %2 = getelementptr inbounds i8* %.0, i64 1
417 %3 = load i8* %.0, align 1
418 %4 = sext i8 %3 to i64
419 %5 = getelementptr inbounds i8* %c, i64 %4
420 %6 = load i8* %5, align 1
422 %wrap.4 = sext i8 %7 to i64
423 %8 = getelementptr inbounds i8* %c, i64 %wrap.4
424 %9 = load i8* %8, align 1
425 store i8 %6, i8* %.08, align 1
426 %10 = getelementptr inbounds i8* %.08, i64 1
427 store i8 %9, i8* %10, align 1
428 %11 = getelementptr inbounds i8* %.08, i64 2
429 %12 = add nsw i32 %.09, -1
430 %13 = icmp eq i32 %12, 0
431 br i1 %13, label %14, label %1