1 ; RUN: opt < %s -sroa -S | FileCheck %s
2 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
16 store float 0.0, float* %a2
17 %v2 = load float * %a2
18 %v2.int = bitcast float %v2 to i32
19 %sum1 = add i32 %v1, %v2.int
30 %X = alloca { i32, float }
31 %Y = getelementptr { i32, float }* %X, i64 0, i32 0
37 define i64 @test2(i64 %X) {
44 %B = bitcast [8 x i8]* %A to i64*
53 define void @test3(i8* %dst, i8* %src) {
57 %a = alloca [300 x i8]
59 ; CHECK: %[[test3_a1:.*]] = alloca [42 x i8]
60 ; CHECK-NEXT: %[[test3_a2:.*]] = alloca [99 x i8]
61 ; CHECK-NEXT: %[[test3_a3:.*]] = alloca [16 x i8]
62 ; CHECK-NEXT: %[[test3_a4:.*]] = alloca [42 x i8]
63 ; CHECK-NEXT: %[[test3_a5:.*]] = alloca [7 x i8]
64 ; CHECK-NEXT: %[[test3_a6:.*]] = alloca [7 x i8]
65 ; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8]
67 %b = getelementptr [300 x i8]* %a, i64 0, i64 0
68 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i32 1, i1 false)
69 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
70 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 42
71 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
72 ; CHECK-NEXT: %[[test3_r1:.*]] = load i8* %[[gep]]
73 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
74 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
75 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
76 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 142
77 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
78 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
79 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 158
80 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
81 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
82 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 200
83 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
84 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
85 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 207
86 ; CHECK-NEXT: %[[test3_r2:.*]] = load i8* %[[gep]]
87 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 208
88 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
89 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
90 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 215
91 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
92 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
94 ; Clobber a single element of the array, this should be promotable.
95 %c = getelementptr [300 x i8]* %a, i64 0, i64 42
98 ; Make a sequence of overlapping stores to the array. These overlap both in
99 ; forward strides and in shrinking accesses.
100 %overlap.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 142
101 %overlap.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 143
102 %overlap.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 144
103 %overlap.4.i8 = getelementptr [300 x i8]* %a, i64 0, i64 145
104 %overlap.5.i8 = getelementptr [300 x i8]* %a, i64 0, i64 146
105 %overlap.6.i8 = getelementptr [300 x i8]* %a, i64 0, i64 147
106 %overlap.7.i8 = getelementptr [300 x i8]* %a, i64 0, i64 148
107 %overlap.8.i8 = getelementptr [300 x i8]* %a, i64 0, i64 149
108 %overlap.9.i8 = getelementptr [300 x i8]* %a, i64 0, i64 150
109 %overlap.1.i16 = bitcast i8* %overlap.1.i8 to i16*
110 %overlap.1.i32 = bitcast i8* %overlap.1.i8 to i32*
111 %overlap.1.i64 = bitcast i8* %overlap.1.i8 to i64*
112 %overlap.2.i64 = bitcast i8* %overlap.2.i8 to i64*
113 %overlap.3.i64 = bitcast i8* %overlap.3.i8 to i64*
114 %overlap.4.i64 = bitcast i8* %overlap.4.i8 to i64*
115 %overlap.5.i64 = bitcast i8* %overlap.5.i8 to i64*
116 %overlap.6.i64 = bitcast i8* %overlap.6.i8 to i64*
117 %overlap.7.i64 = bitcast i8* %overlap.7.i8 to i64*
118 %overlap.8.i64 = bitcast i8* %overlap.8.i8 to i64*
119 %overlap.9.i64 = bitcast i8* %overlap.9.i8 to i64*
120 store i8 1, i8* %overlap.1.i8
121 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
122 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
123 store i16 1, i16* %overlap.1.i16
124 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i16*
125 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
126 store i32 1, i32* %overlap.1.i32
127 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i32*
128 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
129 store i64 1, i64* %overlap.1.i64
130 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i64*
131 ; CHECK-NEXT: store i64 1, i64* %[[bitcast]]
132 store i64 2, i64* %overlap.2.i64
133 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 1
134 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
135 ; CHECK-NEXT: store i64 2, i64* %[[bitcast]]
136 store i64 3, i64* %overlap.3.i64
137 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 2
138 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
139 ; CHECK-NEXT: store i64 3, i64* %[[bitcast]]
140 store i64 4, i64* %overlap.4.i64
141 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 3
142 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
143 ; CHECK-NEXT: store i64 4, i64* %[[bitcast]]
144 store i64 5, i64* %overlap.5.i64
145 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 4
146 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
147 ; CHECK-NEXT: store i64 5, i64* %[[bitcast]]
148 store i64 6, i64* %overlap.6.i64
149 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 5
150 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
151 ; CHECK-NEXT: store i64 6, i64* %[[bitcast]]
152 store i64 7, i64* %overlap.7.i64
153 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 6
154 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
155 ; CHECK-NEXT: store i64 7, i64* %[[bitcast]]
156 store i64 8, i64* %overlap.8.i64
157 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 7
158 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
159 ; CHECK-NEXT: store i64 8, i64* %[[bitcast]]
160 store i64 9, i64* %overlap.9.i64
161 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 8
162 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
163 ; CHECK-NEXT: store i64 9, i64* %[[bitcast]]
165 ; Make two sequences of overlapping stores with more gaps and irregularities.
166 %overlap2.1.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 200
167 %overlap2.1.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 201
168 %overlap2.1.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 202
169 %overlap2.1.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 203
171 %overlap2.2.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 208
172 %overlap2.2.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 209
173 %overlap2.2.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 210
174 %overlap2.2.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 211
176 %overlap2.1.0.i16 = bitcast i8* %overlap2.1.0.i8 to i16*
177 %overlap2.1.0.i32 = bitcast i8* %overlap2.1.0.i8 to i32*
178 %overlap2.1.1.i32 = bitcast i8* %overlap2.1.1.i8 to i32*
179 %overlap2.1.2.i32 = bitcast i8* %overlap2.1.2.i8 to i32*
180 %overlap2.1.3.i32 = bitcast i8* %overlap2.1.3.i8 to i32*
181 store i8 1, i8* %overlap2.1.0.i8
182 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
183 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
184 store i16 1, i16* %overlap2.1.0.i16
185 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i16*
186 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
187 store i32 1, i32* %overlap2.1.0.i32
188 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i32*
189 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
190 store i32 2, i32* %overlap2.1.1.i32
191 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 1
192 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
193 ; CHECK-NEXT: store i32 2, i32* %[[bitcast]]
194 store i32 3, i32* %overlap2.1.2.i32
195 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
196 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
197 ; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
198 store i32 4, i32* %overlap2.1.3.i32
199 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 3
200 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
201 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
203 %overlap2.2.0.i32 = bitcast i8* %overlap2.2.0.i8 to i32*
204 %overlap2.2.1.i16 = bitcast i8* %overlap2.2.1.i8 to i16*
205 %overlap2.2.1.i32 = bitcast i8* %overlap2.2.1.i8 to i32*
206 %overlap2.2.2.i32 = bitcast i8* %overlap2.2.2.i8 to i32*
207 %overlap2.2.3.i32 = bitcast i8* %overlap2.2.3.i8 to i32*
208 store i32 1, i32* %overlap2.2.0.i32
209 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a6]] to i32*
210 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
211 store i8 1, i8* %overlap2.2.1.i8
212 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
213 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
214 store i16 1, i16* %overlap2.2.1.i16
215 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
216 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
217 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
218 store i32 1, i32* %overlap2.2.1.i32
219 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
220 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
221 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
222 store i32 3, i32* %overlap2.2.2.i32
223 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
224 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
225 ; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
226 store i32 4, i32* %overlap2.2.3.i32
227 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 3
228 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
229 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
231 %overlap2.prefix = getelementptr i8* %overlap2.1.1.i8, i64 -4
232 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i32 1, i1 false)
233 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 39
234 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 3
235 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 3
236 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
237 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 5
239 ; Bridge between the overlapping areas
240 call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i32 1, i1 false)
241 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
242 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 5
243 ; ...promoted i8 store...
244 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
245 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 2
247 ; Entirely within the second overlap.
248 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i32 1, i1 false)
249 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
250 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
252 ; Trailing past the second overlap.
253 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i32 1, i1 false)
254 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
255 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
256 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 5
257 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
258 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 3
260 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i32 1, i1 false)
261 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
262 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 42
263 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
264 ; CHECK-NEXT: store i8 0, i8* %[[gep]]
265 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
266 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
267 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
268 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 142
269 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
270 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
271 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 158
272 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
273 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
274 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 200
275 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
276 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
277 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 207
278 ; CHECK-NEXT: store i8 42, i8* %[[gep]]
279 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 208
280 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
281 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
282 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 215
283 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
284 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
289 define void @test4(i8* %dst, i8* %src) {
293 %a = alloca [100 x i8]
295 ; CHECK: %[[test4_a1:.*]] = alloca [20 x i8]
296 ; CHECK-NEXT: %[[test4_a2:.*]] = alloca [7 x i8]
297 ; CHECK-NEXT: %[[test4_a3:.*]] = alloca [10 x i8]
298 ; CHECK-NEXT: %[[test4_a4:.*]] = alloca [7 x i8]
299 ; CHECK-NEXT: %[[test4_a5:.*]] = alloca [7 x i8]
300 ; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8]
302 %b = getelementptr [100 x i8]* %a, i64 0, i64 0
303 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i32 1, i1 false)
304 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
305 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 20
306 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 20
307 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
308 ; CHECK-NEXT: %[[test4_r1:.*]] = load i16* %[[bitcast]]
309 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 22
310 ; CHECK-NEXT: %[[test4_r2:.*]] = load i8* %[[gep]]
311 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 23
312 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
313 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
314 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 30
315 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
316 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
317 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 40
318 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
319 ; CHECK-NEXT: %[[test4_r3:.*]] = load i16* %[[bitcast]]
320 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
321 ; CHECK-NEXT: %[[test4_r4:.*]] = load i8* %[[gep]]
322 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
323 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
324 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
325 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 50
326 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
327 ; CHECK-NEXT: %[[test4_r5:.*]] = load i16* %[[bitcast]]
328 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 52
329 ; CHECK-NEXT: %[[test4_r6:.*]] = load i8* %[[gep]]
330 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 53
331 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
332 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
333 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 60
334 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
335 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
337 %a.src.1 = getelementptr [100 x i8]* %a, i64 0, i64 20
338 %a.dst.1 = getelementptr [100 x i8]* %a, i64 0, i64 40
339 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i32 1, i1 false)
340 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
341 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
342 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
344 ; Clobber a single element of the array, this should be promotable, and be deleted.
345 %c = getelementptr [100 x i8]* %a, i64 0, i64 42
348 %a.src.2 = getelementptr [100 x i8]* %a, i64 0, i64 50
349 call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i32 1, i1 false)
350 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
351 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
352 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
354 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i32 1, i1 false)
355 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
356 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 20
357 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 20
358 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
359 ; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]]
360 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 22
361 ; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]]
362 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 23
363 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
364 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
365 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 30
366 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
367 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
368 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 40
369 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
370 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
371 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
372 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
373 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
374 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
375 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
376 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 50
377 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
378 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
379 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 52
380 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
381 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 53
382 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
383 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
384 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 60
385 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
386 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
391 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
392 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
393 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
395 define i16 @test5() {
397 ; CHECK: alloca float
402 %fptr = bitcast [4 x i8]* %a to float*
403 store float 0.0, float* %fptr
404 %ptr = getelementptr [4 x i8]* %a, i32 0, i32 2
405 %iptr = bitcast i8* %ptr to i16*
406 %val = load i16* %iptr
410 define i32 @test6() {
413 ; CHECK-NEXT: store volatile i32
414 ; CHECK-NEXT: load i32*
415 ; CHECK-NEXT: ret i32
419 %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
420 call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i32 1, i1 true)
421 %iptr = bitcast i8* %ptr to i32*
422 %val = load i32* %iptr
426 define void @test7(i8* %src, i8* %dst) {
429 ; CHECK-NEXT: bitcast i8* %src to i32*
430 ; CHECK-NEXT: load volatile i32*
431 ; CHECK-NEXT: store volatile i32
432 ; CHECK-NEXT: bitcast i8* %dst to i32*
433 ; CHECK-NEXT: load volatile i32*
434 ; CHECK-NEXT: store volatile i32
439 %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
440 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
441 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
446 %S1 = type { i32, i32, [16 x i8] }
447 %S2 = type { %S1*, %S2* }
449 define %S2 @test8(%S2* %s2) {
455 %s2.next.ptr = getelementptr %S2* %s2, i64 0, i32 1
456 %s2.next = load %S2** %s2.next.ptr
457 ; CHECK: %[[gep:.*]] = getelementptr %S2* %s2, i64 0, i32 1
458 ; CHECK-NEXT: %[[next:.*]] = load %S2** %[[gep]]
460 %s2.next.s1.ptr = getelementptr %S2* %s2.next, i64 0, i32 0
461 %s2.next.s1 = load %S1** %s2.next.s1.ptr
462 %new.s1.ptr = getelementptr %S2* %new, i64 0, i32 0
463 store %S1* %s2.next.s1, %S1** %new.s1.ptr
464 %s2.next.next.ptr = getelementptr %S2* %s2.next, i64 0, i32 1
465 %s2.next.next = load %S2** %s2.next.next.ptr
466 %new.next.ptr = getelementptr %S2* %new, i64 0, i32 1
467 store %S2* %s2.next.next, %S2** %new.next.ptr
468 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 0
469 ; CHECK-NEXT: %[[next_s1:.*]] = load %S1** %[[gep]]
470 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 1
471 ; CHECK-NEXT: %[[next_next:.*]] = load %S2** %[[gep]]
473 %new.s1 = load %S1** %new.s1.ptr
474 %result1 = insertvalue %S2 undef, %S1* %new.s1, 0
475 ; CHECK-NEXT: %[[result1:.*]] = insertvalue %S2 undef, %S1* %[[next_s1]], 0
476 %new.next = load %S2** %new.next.ptr
477 %result2 = insertvalue %S2 %result1, %S2* %new.next, 1
478 ; CHECK-NEXT: %[[result2:.*]] = insertvalue %S2 %[[result1]], %S2* %[[next_next]], 1
480 ; CHECK-NEXT: ret %S2 %[[result2]]
483 define i64 @test9() {
484 ; Ensure we can handle loads off the end of an alloca even when wrapped in
485 ; weird bit casts and types. The result is undef, but this shouldn't crash
489 ; CHECK: ret i64 undef
492 %a = alloca { [3 x i8] }
493 %gep1 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 0
494 store i8 0, i8* %gep1, align 1
495 %gep2 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 1
496 store i8 0, i8* %gep2, align 1
497 %gep3 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 2
498 store i8 26, i8* %gep3, align 1
499 %cast = bitcast { [3 x i8] }* %a to { i64 }*
500 %elt = getelementptr inbounds { i64 }* %cast, i32 0, i32 0
501 %result = load i64* %elt
505 define %S2* @test10() {
507 ; CHECK-NOT: alloca %S2*
508 ; CHECK: ret %S2* null
512 %ptr = getelementptr [8 x i8]* %a, i32 0, i32 0
513 call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i32 1, i1 false)
514 %s2ptrptr = bitcast i8* %ptr to %S2**
515 %s2ptr = load %S2** %s2ptrptr
519 define i32 @test11() {
526 br i1 undef, label %good, label %bad
529 %Y = getelementptr i32* %X, i64 0
535 %Y2 = getelementptr i32* %X, i64 1
536 store i32 0, i32* %Y2
541 define i32 @test12() {
545 ; FIXME: SROA should promote accesses to this into whole i24 operations instead
555 %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
556 store i8 0, i8* %b0ptr
557 %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
558 store i8 0, i8* %b1ptr
559 %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
560 store i8 0, i8* %b2ptr
561 %iptr = bitcast [3 x i8]* %a to i24*
563 %ret = zext i24 %i to i32
567 define i32 @test13() {
568 ; Ensure we don't crash and handle undefined loads that straddle the end of the
571 ; CHECK: %[[ret:.*]] = zext i16 undef to i32
572 ; CHECK: ret i32 %[[ret]]
576 %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
577 store i8 0, i8* %b0ptr
578 %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
579 store i8 0, i8* %b1ptr
580 %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
581 store i8 0, i8* %b2ptr
582 %iptrcast = bitcast [3 x i8]* %a to i16*
583 %iptrgep = getelementptr i16* %iptrcast, i64 1
584 %i = load i16* %iptrgep
585 %ret = zext i16 %i to i32
589 %test14.struct = type { [3 x i32] }
591 define void @test14(...) nounwind uwtable {
592 ; This is a strange case where we split allocas into promotable partitions, but
593 ; also gain enough data to prove they must be dead allocas due to GEPs that walk
594 ; across two adjacent allocas. Test that we don't try to promote or otherwise
595 ; do bad things to these dead allocas, they should just be removed.
598 ; CHECK-NEXT: ret void
601 %a = alloca %test14.struct
602 %p = alloca %test14.struct*
603 %0 = bitcast %test14.struct* %a to i8*
604 %1 = getelementptr i8* %0, i64 12
605 %2 = bitcast i8* %1 to %test14.struct*
606 %3 = getelementptr inbounds %test14.struct* %2, i32 0, i32 0
607 %4 = getelementptr inbounds %test14.struct* %a, i32 0, i32 0
608 %5 = bitcast [3 x i32]* %3 to i32*
609 %6 = bitcast [3 x i32]* %4 to i32*
610 %7 = load i32* %6, align 4
611 store i32 %7, i32* %5, align 4
612 %8 = getelementptr inbounds i32* %5, i32 1
613 %9 = getelementptr inbounds i32* %6, i32 1
614 %10 = load i32* %9, align 4
615 store i32 %10, i32* %8, align 4
616 %11 = getelementptr inbounds i32* %5, i32 2
617 %12 = getelementptr inbounds i32* %6, i32 2
618 %13 = load i32* %12, align 4
619 store i32 %13, i32* %11, align 4
623 define i32 @test15(i1 %flag) nounwind uwtable {
624 ; Ensure that when there are dead instructions using an alloca that are not
625 ; loads or stores we still delete them during partitioning and rewriting.
626 ; Otherwise we'll go to promote them while thy still have unpromotable uses.
629 ; CHECK-NEXT: br label %loop
631 ; CHECK-NEXT: br label %loop
641 %dead3 = phi i8* [ %gep3, %loop ], [ null, %entry ]
643 store i64 1879048192, i64* %l0, align 8
644 %bc0 = bitcast i64* %l0 to i8*
645 %gep0 = getelementptr i8* %bc0, i64 3
646 %dead0 = bitcast i8* %gep0 to i64*
648 store i64 1879048192, i64* %l1, align 8
649 %bc1 = bitcast i64* %l1 to i8*
650 %gep1 = getelementptr i8* %bc1, i64 3
651 %dead1 = getelementptr i8* %gep1, i64 1
653 store i64 1879048192, i64* %l2, align 8
654 %bc2 = bitcast i64* %l2 to i8*
655 %gep2.1 = getelementptr i8* %bc2, i64 1
656 %gep2.2 = getelementptr i8* %bc2, i64 3
657 ; Note that this select should get visited multiple times due to using two
658 ; different GEPs off the same alloca. We should only delete it once.
659 %dead2 = select i1 %flag, i8* %gep2.1, i8* %gep2.2
661 store i64 1879048192, i64* %l3, align 8
662 %bc3 = bitcast i64* %l3 to i8*
663 %gep3 = getelementptr i8* %bc3, i64 3
668 define void @test16(i8* %src, i8* %dst) {
669 ; Ensure that we can promote an alloca of [3 x i8] to an i24 SSA value.
672 ; CHECK: %[[srccast:.*]] = bitcast i8* %src to i24*
673 ; CHECK-NEXT: load i24* %[[srccast]]
674 ; CHECK-NEXT: %[[dstcast:.*]] = bitcast i8* %dst to i24*
675 ; CHECK-NEXT: store i24 0, i24* %[[dstcast]]
676 ; CHECK-NEXT: ret void
680 %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
681 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 false)
682 %cast = bitcast i8* %ptr to i24*
683 store i24 0, i24* %cast
684 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 false)
688 define void @test17(i8* %src, i8* %dst) {
689 ; Ensure that we can rewrite unpromotable memcpys which extend past the end of
692 ; CHECK: %[[a:.*]] = alloca [3 x i8]
693 ; CHECK-NEXT: %[[ptr:.*]] = getelementptr [3 x i8]* %[[a]], i32 0, i32 0
694 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[ptr]], i8* %src,
695 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[ptr]],
696 ; CHECK-NEXT: ret void
700 %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
701 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
702 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
706 define void @test18(i8* %src, i8* %dst, i32 %size) {
707 ; Preserve transfer instrinsics with a variable size, even if they overlap with
708 ; fixed size operations. Further, continue to split and promote allocas preceding
709 ; the variable sized intrinsic.
711 ; CHECK: %[[a:.*]] = alloca [34 x i8]
712 ; CHECK: %[[srcgep1:.*]] = getelementptr inbounds i8* %src, i64 4
713 ; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32*
714 ; CHECK-NEXT: %[[srcload:.*]] = load i32* %[[srccast1]]
715 ; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
716 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[agep1]], i8* %src, i32 %size,
717 ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
718 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[agep2]], i8 42, i32 %size,
719 ; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32*
720 ; CHECK-NEXT: store i32 42, i32* %[[dstcast1]]
721 ; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8* %dst, i64 4
722 ; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32*
723 ; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]]
724 ; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
725 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[agep3]], i32 %size,
726 ; CHECK-NEXT: ret void
729 %a = alloca [42 x i8]
730 %ptr = getelementptr [42 x i8]* %a, i32 0, i32 0
731 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 false)
732 %ptr2 = getelementptr [42 x i8]* %a, i32 0, i32 8
733 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i32 1, i1 false)
734 call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i32 1, i1 false)
735 %cast = bitcast i8* %ptr to i32*
736 store i32 42, i32* %cast
737 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 false)
738 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i32 1, i1 false)