1 ; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
2 ; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
3 ; RUN: -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
7 define i32 @ldp_int(i32* %p) nounwind {
8 %tmp = load i32* %p, align 4
9 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
10 %tmp1 = load i32* %add.ptr, align 4
11 %add = add nsw i32 %tmp1, %tmp
17 define i64 @ldp_sext_int(i32* %p) nounwind {
18 %tmp = load i32* %p, align 4
19 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
20 %tmp1 = load i32* %add.ptr, align 4
21 %sexttmp = sext i32 %tmp to i64
22 %sexttmp1 = sext i32 %tmp1 to i64
23 %add = add nsw i64 %sexttmp1, %sexttmp
29 define i64 @ldp_long(i64* %p) nounwind {
30 %tmp = load i64* %p, align 8
31 %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
32 %tmp1 = load i64* %add.ptr, align 8
33 %add = add nsw i64 %tmp1, %tmp
39 define float @ldp_float(float* %p) nounwind {
40 %tmp = load float* %p, align 4
41 %add.ptr = getelementptr inbounds float, float* %p, i64 1
42 %tmp1 = load float* %add.ptr, align 4
43 %add = fadd float %tmp, %tmp1
49 define double @ldp_double(double* %p) nounwind {
50 %tmp = load double* %p, align 8
51 %add.ptr = getelementptr inbounds double, double* %p, i64 1
52 %tmp1 = load double* %add.ptr, align 8
53 %add = fadd double %tmp, %tmp1
57 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
58 define i32 @ldur_int(i32* %a) nounwind {
60 ; LDUR_CHK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
61 ; LDUR_CHK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]]
63 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
64 %tmp1 = load i32* %p1, align 2
65 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
66 %tmp2 = load i32* %p2, align 2
67 %tmp3 = add i32 %tmp1, %tmp2
71 define i64 @ldur_sext_int(i32* %a) nounwind {
72 ; LDUR_CHK: ldur_sext_int
73 ; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
74 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
76 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
77 %tmp1 = load i32* %p1, align 2
78 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
79 %tmp2 = load i32* %p2, align 2
80 %sexttmp1 = sext i32 %tmp1 to i64
81 %sexttmp2 = sext i32 %tmp2 to i64
82 %tmp3 = add i64 %sexttmp1, %sexttmp2
86 define i64 @ldur_long(i64* %a) nounwind ssp {
88 ; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
89 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
91 %p1 = getelementptr inbounds i64, i64* %a, i64 -1
92 %tmp1 = load i64* %p1, align 2
93 %p2 = getelementptr inbounds i64, i64* %a, i64 -2
94 %tmp2 = load i64* %p2, align 2
95 %tmp3 = add i64 %tmp1, %tmp2
99 define float @ldur_float(float* %a) {
100 ; LDUR_CHK: ldur_float
101 ; LDUR_CHK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
102 ; LDUR_CHK-NEXT: add s{{[0-9]+}}, [[DST2]], [[DST1]]
104 %p1 = getelementptr inbounds float, float* %a, i64 -1
105 %tmp1 = load float* %p1, align 2
106 %p2 = getelementptr inbounds float, float* %a, i64 -2
107 %tmp2 = load float* %p2, align 2
108 %tmp3 = fadd float %tmp1, %tmp2
112 define double @ldur_double(double* %a) {
113 ; LDUR_CHK: ldur_double
114 ; LDUR_CHK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
115 ; LDUR_CHK-NEXT: add d{{[0-9]+}}, [[DST2]], [[DST1]]
117 %p1 = getelementptr inbounds double, double* %a, i64 -1
118 %tmp1 = load double* %p1, align 2
119 %p2 = getelementptr inbounds double, double* %a, i64 -2
120 %tmp2 = load double* %p2, align 2
121 %tmp3 = fadd double %tmp1, %tmp2
125 ; Now check some boundary conditions
126 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
127 ; LDUR_CHK: pairUpBarelyIn
129 ; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
130 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
132 %p1 = getelementptr inbounds i64, i64* %a, i64 -31
133 %tmp1 = load i64* %p1, align 2
134 %p2 = getelementptr inbounds i64, i64* %a, i64 -32
135 %tmp2 = load i64* %p2, align 2
136 %tmp3 = add i64 %tmp1, %tmp2
140 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
141 ; LDUR_CHK: pairUpBarelyInSext
143 ; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
144 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
146 %p1 = getelementptr inbounds i32, i32* %a, i64 -63
147 %tmp1 = load i32* %p1, align 2
148 %p2 = getelementptr inbounds i32, i32* %a, i64 -64
149 %tmp2 = load i32* %p2, align 2
150 %sexttmp1 = sext i32 %tmp1 to i64
151 %sexttmp2 = sext i32 %tmp2 to i64
152 %tmp3 = add i64 %sexttmp1, %sexttmp2
156 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
157 ; LDUR_CHK: pairUpBarelyOut
159 ; Don't be fragile about which loads or manipulations of the base register
160 ; are used---just check that there isn't an ldp before the add
163 %p1 = getelementptr inbounds i64, i64* %a, i64 -32
164 %tmp1 = load i64* %p1, align 2
165 %p2 = getelementptr inbounds i64, i64* %a, i64 -33
166 %tmp2 = load i64* %p2, align 2
167 %tmp3 = add i64 %tmp1, %tmp2
171 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
172 ; LDUR_CHK: pairUpBarelyOutSext
174 ; Don't be fragile about which loads or manipulations of the base register
175 ; are used---just check that there isn't an ldp before the add
178 %p1 = getelementptr inbounds i32, i32* %a, i64 -64
179 %tmp1 = load i32* %p1, align 2
180 %p2 = getelementptr inbounds i32, i32* %a, i64 -65
181 %tmp2 = load i32* %p2, align 2
182 %sexttmp1 = sext i32 %tmp1 to i64
183 %sexttmp2 = sext i32 %tmp2 to i64
184 %tmp3 = add i64 %sexttmp1, %sexttmp2
188 define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
189 ; LDUR_CHK: pairUpNotAligned
192 ; LDUR_CHK-NEXT: ldur
195 %p1 = getelementptr inbounds i64, i64* %a, i64 -18
196 %bp1 = bitcast i64* %p1 to i8*
197 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
198 %dp1 = bitcast i8* %bp1p1 to i64*
199 %tmp1 = load i64* %dp1, align 1
201 %p2 = getelementptr inbounds i64, i64* %a, i64 -17
202 %bp2 = bitcast i64* %p2 to i8*
203 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
204 %dp2 = bitcast i8* %bp2p1 to i64*
205 %tmp2 = load i64* %dp2, align 1
207 %tmp3 = add i64 %tmp1, %tmp2
211 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
212 ; LDUR_CHK: pairUpNotAlignedSext
215 ; LDUR_CHK-NEXT: ldursw
218 %p1 = getelementptr inbounds i32, i32* %a, i64 -18
219 %bp1 = bitcast i32* %p1 to i8*
220 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
221 %dp1 = bitcast i8* %bp1p1 to i32*
222 %tmp1 = load i32* %dp1, align 1
224 %p2 = getelementptr inbounds i32, i32* %a, i64 -17
225 %bp2 = bitcast i32* %p2 to i8*
226 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
227 %dp2 = bitcast i8* %bp2p1 to i32*
228 %tmp2 = load i32* %dp2, align 1
230 %sexttmp1 = sext i32 %tmp1 to i64
231 %sexttmp2 = sext i32 %tmp2 to i64
232 %tmp3 = add i64 %sexttmp1, %sexttmp2