1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
5 %type1 = type { <16 x i8> }
6 %type2 = type { <8 x i8> }
7 %type3 = type { <4 x i16> }
10 define hidden fastcc void @t1(%type1** %argtable) nounwind {
13 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
14 ; CHECK: str q0, [x[[REG]]]
15 %tmp1 = load %type1** %argtable, align 8
16 %tmp2 = getelementptr inbounds %type1* %tmp1, i64 0, i32 0
17 store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
21 define hidden fastcc void @t2(%type2** %argtable) nounwind {
24 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
25 ; CHECK: str d0, [x[[REG]]]
26 %tmp1 = load %type2** %argtable, align 8
27 %tmp2 = getelementptr inbounds %type2* %tmp1, i64 0, i32 0
28 store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
32 ; add a bunch of tests for rdar://11246289
34 @globalArray64x2 = common global <2 x i64>* null, align 8
35 @globalArray32x4 = common global <4 x i32>* null, align 8
36 @globalArray16x8 = common global <8 x i16>* null, align 8
37 @globalArray8x16 = common global <16 x i8>* null, align 8
38 @globalArray64x1 = common global <1 x i64>* null, align 8
39 @globalArray32x2 = common global <2 x i32>* null, align 8
40 @globalArray16x4 = common global <4 x i16>* null, align 8
41 @globalArray8x8 = common global <8 x i8>* null, align 8
42 @floatglobalArray64x2 = common global <2 x double>* null, align 8
43 @floatglobalArray32x4 = common global <4 x float>* null, align 8
44 @floatglobalArray64x1 = common global <1 x double>* null, align 8
45 @floatglobalArray32x2 = common global <2 x float>* null, align 8
47 define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
49 ; CHECK-LABEL: fct1_64x2:
50 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
51 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
52 ; CHECK: ldr [[BASE:x[0-9]+]],
53 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
54 %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 %offset
55 %tmp = load <2 x i64>* %arrayidx, align 16
56 %tmp1 = load <2 x i64>** @globalArray64x2, align 8
57 %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 %offset
58 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
62 define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
64 ; CHECK-LABEL: fct2_64x2:
65 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
66 ; CHECK: ldr [[BASE:x[0-9]+]],
67 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
68 %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 3
69 %tmp = load <2 x i64>* %arrayidx, align 16
70 %tmp1 = load <2 x i64>** @globalArray64x2, align 8
71 %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 5
72 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
76 define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
78 ; CHECK-LABEL: fct1_32x4:
79 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
80 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
81 ; CHECK: ldr [[BASE:x[0-9]+]],
82 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
83 %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 %offset
84 %tmp = load <4 x i32>* %arrayidx, align 16
85 %tmp1 = load <4 x i32>** @globalArray32x4, align 8
86 %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 %offset
87 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
91 define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
93 ; CHECK-LABEL: fct2_32x4:
94 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
95 ; CHECK: ldr [[BASE:x[0-9]+]],
96 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
97 %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 3
98 %tmp = load <4 x i32>* %arrayidx, align 16
99 %tmp1 = load <4 x i32>** @globalArray32x4, align 8
100 %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 5
101 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
105 define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
107 ; CHECK-LABEL: fct1_16x8:
108 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
109 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
110 ; CHECK: ldr [[BASE:x[0-9]+]],
111 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
112 %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 %offset
113 %tmp = load <8 x i16>* %arrayidx, align 16
114 %tmp1 = load <8 x i16>** @globalArray16x8, align 8
115 %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 %offset
116 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
120 define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
122 ; CHECK-LABEL: fct2_16x8:
123 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
124 ; CHECK: ldr [[BASE:x[0-9]+]],
125 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
126 %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 3
127 %tmp = load <8 x i16>* %arrayidx, align 16
128 %tmp1 = load <8 x i16>** @globalArray16x8, align 8
129 %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 5
130 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
134 define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
136 ; CHECK-LABEL: fct1_8x16:
137 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
138 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
139 ; CHECK: ldr [[BASE:x[0-9]+]],
140 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
141 %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 %offset
142 %tmp = load <16 x i8>* %arrayidx, align 16
143 %tmp1 = load <16 x i8>** @globalArray8x16, align 8
144 %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 %offset
145 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
149 define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
151 ; CHECK-LABEL: fct2_8x16:
152 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
153 ; CHECK: ldr [[BASE:x[0-9]+]],
154 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
155 %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 3
156 %tmp = load <16 x i8>* %arrayidx, align 16
157 %tmp1 = load <16 x i8>** @globalArray8x16, align 8
158 %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 5
159 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
163 define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
165 ; CHECK-LABEL: fct1_64x1:
166 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
167 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
168 ; CHECK: ldr [[BASE:x[0-9]+]],
169 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
170 %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 %offset
171 %tmp = load <1 x i64>* %arrayidx, align 8
172 %tmp1 = load <1 x i64>** @globalArray64x1, align 8
173 %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 %offset
174 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
178 define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
180 ; CHECK-LABEL: fct2_64x1:
181 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
182 ; CHECK: ldr [[BASE:x[0-9]+]],
183 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
184 %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 3
185 %tmp = load <1 x i64>* %arrayidx, align 8
186 %tmp1 = load <1 x i64>** @globalArray64x1, align 8
187 %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 5
188 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
192 define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
194 ; CHECK-LABEL: fct1_32x2:
195 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
196 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
197 ; CHECK: ldr [[BASE:x[0-9]+]],
198 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
199 %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 %offset
200 %tmp = load <2 x i32>* %arrayidx, align 8
201 %tmp1 = load <2 x i32>** @globalArray32x2, align 8
202 %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 %offset
203 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
207 define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
209 ; CHECK-LABEL: fct2_32x2:
210 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
211 ; CHECK: ldr [[BASE:x[0-9]+]],
212 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
213 %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 3
214 %tmp = load <2 x i32>* %arrayidx, align 8
215 %tmp1 = load <2 x i32>** @globalArray32x2, align 8
216 %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 5
217 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
221 define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
223 ; CHECK-LABEL: fct1_16x4:
224 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
225 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
226 ; CHECK: ldr [[BASE:x[0-9]+]],
227 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
228 %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 %offset
229 %tmp = load <4 x i16>* %arrayidx, align 8
230 %tmp1 = load <4 x i16>** @globalArray16x4, align 8
231 %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 %offset
232 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
236 define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
238 ; CHECK-LABEL: fct2_16x4:
239 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
240 ; CHECK: ldr [[BASE:x[0-9]+]],
241 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
242 %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 3
243 %tmp = load <4 x i16>* %arrayidx, align 8
244 %tmp1 = load <4 x i16>** @globalArray16x4, align 8
245 %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 5
246 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
250 define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
252 ; CHECK-LABEL: fct1_8x8:
253 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
254 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
255 ; CHECK: ldr [[BASE:x[0-9]+]],
256 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
257 %arrayidx = getelementptr inbounds <8 x i8>* %array, i64 %offset
258 %tmp = load <8 x i8>* %arrayidx, align 8
259 %tmp1 = load <8 x i8>** @globalArray8x8, align 8
260 %arrayidx1 = getelementptr inbounds <8 x i8>* %tmp1, i64 %offset
261 store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
265 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
266 ; registers for unscaled vector accesses
267 @str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
269 define <1 x i64> @fct0() nounwind readonly ssp {
272 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
273 %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
277 define <2 x i32> @fct1() nounwind readonly ssp {
280 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
281 %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
285 define <4 x i16> @fct2() nounwind readonly ssp {
288 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
289 %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
293 define <8 x i8> @fct3() nounwind readonly ssp {
296 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
297 %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
301 define <2 x i64> @fct4() nounwind readonly ssp {
304 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
305 %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
309 define <4 x i32> @fct5() nounwind readonly ssp {
312 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
313 %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
317 define <8 x i16> @fct6() nounwind readonly ssp {
320 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
321 %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
325 define <16 x i8> @fct7() nounwind readonly ssp {
328 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
329 %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
333 define void @fct8() nounwind ssp {
336 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
337 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
338 %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
339 store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
343 define void @fct9() nounwind ssp {
346 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
347 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
348 %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
349 store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
353 define void @fct10() nounwind ssp {
355 ; CHECK-LABEL: fct10:
356 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
357 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
358 %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
359 store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
363 define void @fct11() nounwind ssp {
365 ; CHECK-LABEL: fct11:
366 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
367 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
368 %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
369 store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
373 define void @fct12() nounwind ssp {
375 ; CHECK-LABEL: fct12:
376 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
377 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
378 %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
379 store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
383 define void @fct13() nounwind ssp {
385 ; CHECK-LABEL: fct13:
386 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
387 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
388 %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
389 store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
393 define void @fct14() nounwind ssp {
395 ; CHECK-LABEL: fct14:
396 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
397 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
398 %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
399 store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
403 define void @fct15() nounwind ssp {
405 ; CHECK-LABEL: fct15:
406 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
407 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
408 %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
409 store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
413 ; Check the building of vector from a single loaded value.
414 ; Part of <rdar://problem/14170854>
416 ; Single loads with immediate offset.
417 define <8 x i8> @fct16(i8* nocapture %sp0) {
418 ; CHECK-LABEL: fct16:
419 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
420 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
422 %addr = getelementptr i8* %sp0, i64 1
423 %pix_sp0.0.copyload = load i8* %addr, align 1
424 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
425 %vmull.i = mul <8 x i8> %vec, %vec
426 ret <8 x i8> %vmull.i
429 define <16 x i8> @fct17(i8* nocapture %sp0) {
430 ; CHECK-LABEL: fct17:
431 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
432 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
434 %addr = getelementptr i8* %sp0, i64 1
435 %pix_sp0.0.copyload = load i8* %addr, align 1
436 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
437 %vmull.i = mul <16 x i8> %vec, %vec
438 ret <16 x i8> %vmull.i
441 define <4 x i16> @fct18(i16* nocapture %sp0) {
442 ; CHECK-LABEL: fct18:
443 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
444 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
446 %addr = getelementptr i16* %sp0, i64 1
447 %pix_sp0.0.copyload = load i16* %addr, align 1
448 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
449 %vmull.i = mul <4 x i16> %vec, %vec
450 ret <4 x i16> %vmull.i
453 define <8 x i16> @fct19(i16* nocapture %sp0) {
454 ; CHECK-LABEL: fct19:
455 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
456 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
458 %addr = getelementptr i16* %sp0, i64 1
459 %pix_sp0.0.copyload = load i16* %addr, align 1
460 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
461 %vmull.i = mul <8 x i16> %vec, %vec
462 ret <8 x i16> %vmull.i
465 define <2 x i32> @fct20(i32* nocapture %sp0) {
466 ; CHECK-LABEL: fct20:
467 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
468 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
470 %addr = getelementptr i32* %sp0, i64 1
471 %pix_sp0.0.copyload = load i32* %addr, align 1
472 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
473 %vmull.i = mul <2 x i32> %vec, %vec
474 ret <2 x i32> %vmull.i
477 define <4 x i32> @fct21(i32* nocapture %sp0) {
478 ; CHECK-LABEL: fct21:
479 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
480 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
482 %addr = getelementptr i32* %sp0, i64 1
483 %pix_sp0.0.copyload = load i32* %addr, align 1
484 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
485 %vmull.i = mul <4 x i32> %vec, %vec
486 ret <4 x i32> %vmull.i
489 define <1 x i64> @fct22(i64* nocapture %sp0) {
490 ; CHECK-LABEL: fct22:
491 ; CHECK: ldr d0, [x0, #8]
493 %addr = getelementptr i64* %sp0, i64 1
494 %pix_sp0.0.copyload = load i64* %addr, align 1
495 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
499 define <2 x i64> @fct23(i64* nocapture %sp0) {
500 ; CHECK-LABEL: fct23:
501 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
503 %addr = getelementptr i64* %sp0, i64 1
504 %pix_sp0.0.copyload = load i64* %addr, align 1
505 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
510 ; Single loads with register offset.
511 define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
512 ; CHECK-LABEL: fct24:
513 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
514 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
516 %addr = getelementptr i8* %sp0, i64 %offset
517 %pix_sp0.0.copyload = load i8* %addr, align 1
518 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
519 %vmull.i = mul <8 x i8> %vec, %vec
520 ret <8 x i8> %vmull.i
523 define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
524 ; CHECK-LABEL: fct25:
525 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
526 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
528 %addr = getelementptr i8* %sp0, i64 %offset
529 %pix_sp0.0.copyload = load i8* %addr, align 1
530 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
531 %vmull.i = mul <16 x i8> %vec, %vec
532 ret <16 x i8> %vmull.i
535 define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
536 ; CHECK-LABEL: fct26:
537 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
538 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
540 %addr = getelementptr i16* %sp0, i64 %offset
541 %pix_sp0.0.copyload = load i16* %addr, align 1
542 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
543 %vmull.i = mul <4 x i16> %vec, %vec
544 ret <4 x i16> %vmull.i
547 define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
548 ; CHECK-LABEL: fct27:
549 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
550 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
552 %addr = getelementptr i16* %sp0, i64 %offset
553 %pix_sp0.0.copyload = load i16* %addr, align 1
554 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
555 %vmull.i = mul <8 x i16> %vec, %vec
556 ret <8 x i16> %vmull.i
559 define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
560 ; CHECK-LABEL: fct28:
561 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
562 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
564 %addr = getelementptr i32* %sp0, i64 %offset
565 %pix_sp0.0.copyload = load i32* %addr, align 1
566 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
567 %vmull.i = mul <2 x i32> %vec, %vec
568 ret <2 x i32> %vmull.i
571 define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
572 ; CHECK-LABEL: fct29:
573 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
574 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
576 %addr = getelementptr i32* %sp0, i64 %offset
577 %pix_sp0.0.copyload = load i32* %addr, align 1
578 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
579 %vmull.i = mul <4 x i32> %vec, %vec
580 ret <4 x i32> %vmull.i
583 define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
584 ; CHECK-LABEL: fct30:
585 ; CHECK: ldr d0, [x0, x1, lsl #3]
587 %addr = getelementptr i64* %sp0, i64 %offset
588 %pix_sp0.0.copyload = load i64* %addr, align 1
589 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
593 define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
594 ; CHECK-LABEL: fct31:
595 ; CHECK: ldr d0, [x0, x1, lsl #3]
597 %addr = getelementptr i64* %sp0, i64 %offset
598 %pix_sp0.0.copyload = load i64* %addr, align 1
599 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0