1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4 target triple = "x86_64-apple-macosx10.8.0"
6 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
7 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
9 ; CHECK: merge_const_store
10 ; save 1,2,3 ... as one big integer.
11 ; CHECK: movabsq $578437695752307201
13 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
14 %1 = icmp sgt i32 %count, 0
15 br i1 %1, label %.lr.ph, label %._crit_edge
17 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
18 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
19 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
20 store i8 1, i8* %2, align 1
21 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
22 store i8 2, i8* %3, align 1
23 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
24 store i8 3, i8* %4, align 1
25 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
26 store i8 4, i8* %5, align 1
27 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
28 store i8 5, i8* %6, align 1
29 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
30 store i8 6, i8* %7, align 1
31 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
32 store i8 7, i8* %8, align 1
33 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
34 store i8 8, i8* %9, align 1
35 %10 = add nsw i32 %i.02, 1
36 %11 = getelementptr inbounds %struct.A* %.01, i64 1
37 %exitcond = icmp eq i32 %10, %count
38 br i1 %exitcond, label %._crit_edge, label %.lr.ph
43 ; Move the constants using a single vector store.
44 ; CHECK: merge_const_store_vec
45 ; CHECK: vmovups %ymm0, (%rsi)
47 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
48 %1 = icmp sgt i32 %count, 0
49 br i1 %1, label %.lr.ph, label %._crit_edge
51 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
52 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
53 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
54 store i32 0, i32* %2, align 4
55 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
56 store i32 0, i32* %3, align 4
57 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
58 store i32 0, i32* %4, align 4
59 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
60 store i32 0, i32* %5, align 4
61 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
62 store i32 0, i32* %6, align 4
63 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
64 store i32 0, i32* %7, align 4
65 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
66 store i32 0, i32* %8, align 4
67 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
68 store i32 0, i32* %9, align 4
69 %10 = add nsw i32 %i.02, 1
70 %11 = getelementptr inbounds %struct.B* %.01, i64 1
71 %exitcond = icmp eq i32 %10, %count
72 br i1 %exitcond, label %._crit_edge, label %.lr.ph
77 ; Move the first 4 constants as a single vector. Move the rest as scalars.
78 ; CHECK: merge_nonconst_store
79 ; CHECK: movl $67305985
85 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
86 %1 = icmp sgt i32 %count, 0
87 br i1 %1, label %.lr.ph, label %._crit_edge
89 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
90 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
91 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
92 store i8 1, i8* %2, align 1
93 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
94 store i8 2, i8* %3, align 1
95 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
96 store i8 3, i8* %4, align 1
97 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
98 store i8 4, i8* %5, align 1
99 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
100 store i8 %zz, i8* %6, align 1 ; <----------- Not a const;
101 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
102 store i8 6, i8* %7, align 1
103 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
104 store i8 7, i8* %8, align 1
105 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
106 store i8 8, i8* %9, align 1
107 %10 = add nsw i32 %i.02, 1
108 %11 = getelementptr inbounds %struct.A* %.01, i64 1
109 %exitcond = icmp eq i32 %10, %count
110 br i1 %exitcond, label %._crit_edge, label %.lr.ph
116 ;CHECK: merge_loads_i16
122 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
123 %1 = icmp sgt i32 %count, 0
124 br i1 %1, label %.lr.ph, label %._crit_edge
127 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
128 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
131 ; <label>:4 ; preds = %4, %.lr.ph
132 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
133 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
134 %5 = load i8* %2, align 1
135 %6 = load i8* %3, align 1
136 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
137 store i8 %5, i8* %7, align 1
138 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
139 store i8 %6, i8* %8, align 1
140 %9 = add nsw i32 %i.02, 1
141 %10 = getelementptr inbounds %struct.A* %.01, i64 1
142 %exitcond = icmp eq i32 %9, %count
143 br i1 %exitcond, label %._crit_edge, label %4
145 ._crit_edge: ; preds = %4, %0
149 ; The loads and the stores are interleved. Can't merge them.
150 ;CHECK: no_merge_loads
156 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
157 %1 = icmp sgt i32 %count, 0
158 br i1 %1, label %.lr.ph, label %._crit_edge
161 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
162 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
165 a4: ; preds = %4, %.lr.ph
166 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
167 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
168 %a5 = load i8* %2, align 1
169 %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
170 store i8 %a5, i8* %a7, align 1
171 %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
172 %a6 = load i8* %3, align 1
173 store i8 %a6, i8* %a8, align 1
174 %a9 = add nsw i32 %i.02, 1
175 %a10 = getelementptr inbounds %struct.A* %.01, i64 1
176 %exitcond = icmp eq i32 %a9, %count
177 br i1 %exitcond, label %._crit_edge, label %a4
179 ._crit_edge: ; preds = %4, %0
184 ;CHECK: merge_loads_integer
190 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
191 %1 = icmp sgt i32 %count, 0
192 br i1 %1, label %.lr.ph, label %._crit_edge
195 %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
196 %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
199 ; <label>:4 ; preds = %4, %.lr.ph
200 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
201 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
204 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
205 store i32 %5, i32* %7
206 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
207 store i32 %6, i32* %8
208 %9 = add nsw i32 %i.02, 1
209 %10 = getelementptr inbounds %struct.B* %.01, i64 1
210 %exitcond = icmp eq i32 %9, %count
211 br i1 %exitcond, label %._crit_edge, label %4
213 ._crit_edge: ; preds = %4, %0
218 ;CHECK: merge_loads_vector
224 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
225 %a1 = icmp sgt i32 %count, 0
226 br i1 %a1, label %.lr.ph, label %._crit_edge
229 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
230 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
231 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
232 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
235 block4: ; preds = %4, %.lr.ph
236 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
237 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
238 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
239 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
240 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
241 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
246 store i32 %b1, i32* %a7
247 store i32 %b2, i32* %a8
248 store i32 %b3, i32* %a9
249 store i32 %b4, i32* %a10
250 %c9 = add nsw i32 %i.02, 1
251 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
252 %exitcond = icmp eq i32 %c9, %count
253 br i1 %exitcond, label %._crit_edge, label %block4
255 ._crit_edge: ; preds = %4, %0
259 ;CHECK: merge_loads_no_align
271 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
272 %a1 = icmp sgt i32 %count, 0
273 br i1 %a1, label %.lr.ph, label %._crit_edge
276 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
277 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
278 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
279 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
282 block4: ; preds = %4, %.lr.ph
283 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
284 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
285 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
286 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
287 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
288 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
289 %b1 = load i32* %a2, align 1
290 %b2 = load i32* %a3, align 1
291 %b3 = load i32* %a4, align 1
292 %b4 = load i32* %a5, align 1
293 store i32 %b1, i32* %a7, align 1
294 store i32 %b2, i32* %a8, align 1
295 store i32 %b3, i32* %a9, align 1
296 store i32 %b4, i32* %a10, align 1
297 %c9 = add nsw i32 %i.02, 1
298 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
299 %exitcond = icmp eq i32 %c9, %count
300 br i1 %exitcond, label %._crit_edge, label %block4
302 ._crit_edge: ; preds = %4, %0