1 ; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
2 ; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
3 ; RUN: -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
7 define i32 @ldp_int(i32* %p) nounwind {
8 %tmp = load i32, i32* %p, align 4
9 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
10 %tmp1 = load i32, i32* %add.ptr, align 4
11 %add = add nsw i32 %tmp1, %tmp
17 define i64 @ldp_sext_int(i32* %p) nounwind {
18 %tmp = load i32, i32* %p, align 4
19 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
20 %tmp1 = load i32, i32* %add.ptr, align 4
21 %sexttmp = sext i32 %tmp to i64
22 %sexttmp1 = sext i32 %tmp1 to i64
23 %add = add nsw i64 %sexttmp1, %sexttmp
27 ; CHECK-LABEL: ldp_half_sext_res0_int:
28 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
29 ; CHECK: sxtw x[[DST1]], w[[DST1]]
30 define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
31 %tmp = load i32, i32* %p, align 4
32 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
33 %tmp1 = load i32, i32* %add.ptr, align 4
34 %sexttmp = sext i32 %tmp to i64
35 %sexttmp1 = zext i32 %tmp1 to i64
36 %add = add nsw i64 %sexttmp1, %sexttmp
40 ; CHECK-LABEL: ldp_half_sext_res1_int:
41 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
42 ; CHECK: sxtw x[[DST2]], w[[DST2]]
43 define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
44 %tmp = load i32, i32* %p, align 4
45 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
46 %tmp1 = load i32, i32* %add.ptr, align 4
47 %sexttmp = zext i32 %tmp to i64
48 %sexttmp1 = sext i32 %tmp1 to i64
49 %add = add nsw i64 %sexttmp1, %sexttmp
56 define i64 @ldp_long(i64* %p) nounwind {
57 %tmp = load i64, i64* %p, align 8
58 %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
59 %tmp1 = load i64, i64* %add.ptr, align 8
60 %add = add nsw i64 %tmp1, %tmp
66 define float @ldp_float(float* %p) nounwind {
67 %tmp = load float, float* %p, align 4
68 %add.ptr = getelementptr inbounds float, float* %p, i64 1
69 %tmp1 = load float, float* %add.ptr, align 4
70 %add = fadd float %tmp, %tmp1
76 define double @ldp_double(double* %p) nounwind {
77 %tmp = load double, double* %p, align 8
78 %add.ptr = getelementptr inbounds double, double* %p, i64 1
79 %tmp1 = load double, double* %add.ptr, align 8
80 %add = fadd double %tmp, %tmp1
84 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
85 define i32 @ldur_int(i32* %a) nounwind {
87 ; LDUR_CHK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
88 ; LDUR_CHK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]]
90 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
91 %tmp1 = load i32, i32* %p1, align 2
92 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
93 %tmp2 = load i32, i32* %p2, align 2
94 %tmp3 = add i32 %tmp1, %tmp2
98 define i64 @ldur_sext_int(i32* %a) nounwind {
99 ; LDUR_CHK: ldur_sext_int
100 ; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
101 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
103 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
104 %tmp1 = load i32, i32* %p1, align 2
105 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
106 %tmp2 = load i32, i32* %p2, align 2
107 %sexttmp1 = sext i32 %tmp1 to i64
108 %sexttmp2 = sext i32 %tmp2 to i64
109 %tmp3 = add i64 %sexttmp1, %sexttmp2
113 define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
114 ; LDUR_CHK: ldur_half_sext_int_res0
115 ; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
116 ; LDUR_CHK: sxtw x[[DST1]], w[[DST1]]
117 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
119 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
120 %tmp1 = load i32, i32* %p1, align 2
121 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
122 %tmp2 = load i32, i32* %p2, align 2
123 %sexttmp1 = zext i32 %tmp1 to i64
124 %sexttmp2 = sext i32 %tmp2 to i64
125 %tmp3 = add i64 %sexttmp1, %sexttmp2
129 define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
130 ; LDUR_CHK: ldur_half_sext_int_res1
131 ; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
132 ; LDUR_CHK: sxtw x[[DST2]], w[[DST2]]
133 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
135 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
136 %tmp1 = load i32, i32* %p1, align 2
137 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
138 %tmp2 = load i32, i32* %p2, align 2
139 %sexttmp1 = sext i32 %tmp1 to i64
140 %sexttmp2 = zext i32 %tmp2 to i64
141 %tmp3 = add i64 %sexttmp1, %sexttmp2
146 define i64 @ldur_long(i64* %a) nounwind ssp {
147 ; LDUR_CHK: ldur_long
148 ; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
149 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
151 %p1 = getelementptr inbounds i64, i64* %a, i64 -1
152 %tmp1 = load i64, i64* %p1, align 2
153 %p2 = getelementptr inbounds i64, i64* %a, i64 -2
154 %tmp2 = load i64, i64* %p2, align 2
155 %tmp3 = add i64 %tmp1, %tmp2
159 define float @ldur_float(float* %a) {
160 ; LDUR_CHK: ldur_float
161 ; LDUR_CHK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
162 ; LDUR_CHK-NEXT: add s{{[0-9]+}}, [[DST2]], [[DST1]]
164 %p1 = getelementptr inbounds float, float* %a, i64 -1
165 %tmp1 = load float, float* %p1, align 2
166 %p2 = getelementptr inbounds float, float* %a, i64 -2
167 %tmp2 = load float, float* %p2, align 2
168 %tmp3 = fadd float %tmp1, %tmp2
172 define double @ldur_double(double* %a) {
173 ; LDUR_CHK: ldur_double
174 ; LDUR_CHK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
175 ; LDUR_CHK-NEXT: add d{{[0-9]+}}, [[DST2]], [[DST1]]
177 %p1 = getelementptr inbounds double, double* %a, i64 -1
178 %tmp1 = load double, double* %p1, align 2
179 %p2 = getelementptr inbounds double, double* %a, i64 -2
180 %tmp2 = load double, double* %p2, align 2
181 %tmp3 = fadd double %tmp1, %tmp2
185 ; Now check some boundary conditions
186 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
187 ; LDUR_CHK: pairUpBarelyIn
189 ; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
190 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
192 %p1 = getelementptr inbounds i64, i64* %a, i64 -31
193 %tmp1 = load i64, i64* %p1, align 2
194 %p2 = getelementptr inbounds i64, i64* %a, i64 -32
195 %tmp2 = load i64, i64* %p2, align 2
196 %tmp3 = add i64 %tmp1, %tmp2
200 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
201 ; LDUR_CHK: pairUpBarelyInSext
203 ; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
204 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
206 %p1 = getelementptr inbounds i32, i32* %a, i64 -63
207 %tmp1 = load i32, i32* %p1, align 2
208 %p2 = getelementptr inbounds i32, i32* %a, i64 -64
209 %tmp2 = load i32, i32* %p2, align 2
210 %sexttmp1 = sext i32 %tmp1 to i64
211 %sexttmp2 = sext i32 %tmp2 to i64
212 %tmp3 = add i64 %sexttmp1, %sexttmp2
216 define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
217 ; LDUR_CHK: pairUpBarelyInHalfSextRes0
219 ; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
220 ; LDUR_CHK: sxtw x[[DST1]], w[[DST1]]
221 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
223 %p1 = getelementptr inbounds i32, i32* %a, i64 -63
224 %tmp1 = load i32, i32* %p1, align 2
225 %p2 = getelementptr inbounds i32, i32* %a, i64 -64
226 %tmp2 = load i32, i32* %p2, align 2
227 %sexttmp1 = zext i32 %tmp1 to i64
228 %sexttmp2 = sext i32 %tmp2 to i64
229 %tmp3 = add i64 %sexttmp1, %sexttmp2
233 define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
234 ; LDUR_CHK: pairUpBarelyInHalfSextRes1
236 ; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
237 ; LDUR_CHK: sxtw x[[DST2]], w[[DST2]]
238 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
240 %p1 = getelementptr inbounds i32, i32* %a, i64 -63
241 %tmp1 = load i32, i32* %p1, align 2
242 %p2 = getelementptr inbounds i32, i32* %a, i64 -64
243 %tmp2 = load i32, i32* %p2, align 2
244 %sexttmp1 = sext i32 %tmp1 to i64
245 %sexttmp2 = zext i32 %tmp2 to i64
246 %tmp3 = add i64 %sexttmp1, %sexttmp2
250 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
251 ; LDUR_CHK: pairUpBarelyOut
253 ; Don't be fragile about which loads or manipulations of the base register
254 ; are used---just check that there isn't an ldp before the add
257 %p1 = getelementptr inbounds i64, i64* %a, i64 -32
258 %tmp1 = load i64, i64* %p1, align 2
259 %p2 = getelementptr inbounds i64, i64* %a, i64 -33
260 %tmp2 = load i64, i64* %p2, align 2
261 %tmp3 = add i64 %tmp1, %tmp2
265 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
266 ; LDUR_CHK: pairUpBarelyOutSext
268 ; Don't be fragile about which loads or manipulations of the base register
269 ; are used---just check that there isn't an ldp before the add
272 %p1 = getelementptr inbounds i32, i32* %a, i64 -64
273 %tmp1 = load i32, i32* %p1, align 2
274 %p2 = getelementptr inbounds i32, i32* %a, i64 -65
275 %tmp2 = load i32, i32* %p2, align 2
276 %sexttmp1 = sext i32 %tmp1 to i64
277 %sexttmp2 = sext i32 %tmp2 to i64
278 %tmp3 = add i64 %sexttmp1, %sexttmp2
282 define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
283 ; LDUR_CHK: pairUpNotAligned
286 ; LDUR_CHK-NEXT: ldur
289 %p1 = getelementptr inbounds i64, i64* %a, i64 -18
290 %bp1 = bitcast i64* %p1 to i8*
291 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
292 %dp1 = bitcast i8* %bp1p1 to i64*
293 %tmp1 = load i64, i64* %dp1, align 1
295 %p2 = getelementptr inbounds i64, i64* %a, i64 -17
296 %bp2 = bitcast i64* %p2 to i8*
297 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
298 %dp2 = bitcast i8* %bp2p1 to i64*
299 %tmp2 = load i64, i64* %dp2, align 1
301 %tmp3 = add i64 %tmp1, %tmp2
305 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
306 ; LDUR_CHK: pairUpNotAlignedSext
309 ; LDUR_CHK-NEXT: ldursw
312 %p1 = getelementptr inbounds i32, i32* %a, i64 -18
313 %bp1 = bitcast i32* %p1 to i8*
314 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
315 %dp1 = bitcast i8* %bp1p1 to i32*
316 %tmp1 = load i32, i32* %dp1, align 1
318 %p2 = getelementptr inbounds i32, i32* %a, i64 -17
319 %bp2 = bitcast i32* %p2 to i8*
320 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
321 %dp2 = bitcast i8* %bp2p1 to i32*
322 %tmp2 = load i32, i32* %dp2, align 1
324 %sexttmp1 = sext i32 %tmp1 to i64
325 %sexttmp2 = sext i32 %tmp2 to i64
326 %tmp3 = add i64 %sexttmp1, %sexttmp2