1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5 ; CHECK-LABEL: ins16bw:
6 ; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
12 ; CHECK-LABEL: ins8hw:
13 ; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
19 ; CHECK-LABEL: ins4sw:
20 ; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
26 ; CHECK-LABEL: ins2dw:
27 ; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
33 ; CHECK-LABEL: ins8bw:
34 ; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
40 ; CHECK-LABEL: ins4hw:
41 ; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
47 ; CHECK-LABEL: ins2sw:
48 ; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
54 ; CHECK-LABEL: ins16b16:
55 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
62 ; CHECK-LABEL: ins8h8:
63 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
70 ; CHECK-LABEL: ins4s4:
71 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
78 ; CHECK-LABEL: ins2d2:
79 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
86 ; CHECK-LABEL: ins4f4:
87 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
88 %tmp3 = extractelement <4 x float> %tmp1, i32 2
89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
94 ; CHECK-LABEL: ins2df2:
95 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
96 %tmp3 = extractelement <2 x double> %tmp1, i32 0
97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
98 ret <2 x double> %tmp4
101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
102 ; CHECK-LABEL: ins8b16:
103 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
110 ; CHECK-LABEL: ins4h8:
111 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
118 ; CHECK-LABEL: ins2s4:
119 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
126 ; CHECK-LABEL: ins1d2:
127 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
134 ; CHECK-LABEL: ins2f4:
135 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
136 %tmp3 = extractelement <2 x float> %tmp1, i32 1
137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
138 ret <4 x float> %tmp4
141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
142 ; CHECK-LABEL: ins1f2:
143 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
144 %tmp3 = extractelement <1 x double> %tmp1, i32 0
145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
146 ret <2 x double> %tmp4
149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
150 ; CHECK-LABEL: ins16b8:
151 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
158 ; CHECK-LABEL: ins8h4:
159 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
166 ; CHECK-LABEL: ins4s2:
167 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
174 ; CHECK-LABEL: ins2d1:
175 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
182 ; CHECK-LABEL: ins4f2:
183 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
184 %tmp3 = extractelement <4 x float> %tmp1, i32 2
185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
186 ret <2 x float> %tmp4
189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
190 ; CHECK-LABEL: ins2f1:
191 ; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
192 %tmp3 = extractelement <2 x double> %tmp1, i32 1
193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
194 ret <1 x double> %tmp4
197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
198 ; CHECK-LABEL: ins8b8:
199 ; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
206 ; CHECK-LABEL: ins4h4:
207 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
214 ; CHECK-LABEL: ins2s2:
215 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0
217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
222 ; CHECK-LABEL: ins1d1:
223 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
230 ; CHECK-LABEL: ins2f2:
231 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
232 %tmp3 = extractelement <2 x float> %tmp1, i32 0
233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
234 ret <2 x float> %tmp4
237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
238 ; CHECK-LABEL: ins1df1:
239 ; CHECK-NOT: ins {{v[0-9]+}}
240 %tmp3 = extractelement <1 x double> %tmp1, i32 0
241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
242 ret <1 x double> %tmp4
245 define i32 @umovw16b(<16 x i8> %tmp1) {
246 ; CHECK-LABEL: umovw16b:
247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
249 %tmp4 = zext i8 %tmp3 to i32
253 define i32 @umovw8h(<8 x i16> %tmp1) {
254 ; CHECK-LABEL: umovw8h:
255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
257 %tmp4 = zext i16 %tmp3 to i32
261 define i32 @umovw4s(<4 x i32> %tmp1) {
262 ; CHECK-LABEL: umovw4s:
263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
268 define i64 @umovx2d(<2 x i64> %tmp1) {
269 ; CHECK-LABEL: umovx2d:
270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1
275 define i32 @umovw8b(<8 x i8> %tmp1) {
276 ; CHECK-LABEL: umovw8b:
277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7
279 %tmp4 = zext i8 %tmp3 to i32
283 define i32 @umovw4h(<4 x i16> %tmp1) {
284 ; CHECK-LABEL: umovw4h:
285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
287 %tmp4 = zext i16 %tmp3 to i32
291 define i32 @umovw2s(<2 x i32> %tmp1) {
292 ; CHECK-LABEL: umovw2s:
293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
298 define i64 @umovx1d(<1 x i64> %tmp1) {
299 ; CHECK-LABEL: umovx1d:
300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
305 define i32 @smovw16b(<16 x i8> %tmp1) {
306 ; CHECK-LABEL: smovw16b:
307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
309 %tmp4 = sext i8 %tmp3 to i32
310 %tmp5 = add i32 %tmp4, %tmp4
314 define i32 @smovw8h(<8 x i16> %tmp1) {
315 ; CHECK-LABEL: smovw8h:
316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
318 %tmp4 = sext i16 %tmp3 to i32
319 %tmp5 = add i32 %tmp4, %tmp4
323 define i64 @smovx16b(<16 x i8> %tmp1) {
324 ; CHECK-LABEL: smovx16b:
325 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
327 %tmp4 = sext i8 %tmp3 to i64
331 define i64 @smovx8h(<8 x i16> %tmp1) {
332 ; CHECK-LABEL: smovx8h:
333 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
334 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
335 %tmp4 = sext i16 %tmp3 to i64
339 define i64 @smovx4s(<4 x i32> %tmp1) {
340 ; CHECK-LABEL: smovx4s:
341 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
342 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
343 %tmp4 = sext i32 %tmp3 to i64
347 define i32 @smovw8b(<8 x i8> %tmp1) {
348 ; CHECK-LABEL: smovw8b:
349 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
350 %tmp3 = extractelement <8 x i8> %tmp1, i32 4
351 %tmp4 = sext i8 %tmp3 to i32
352 %tmp5 = add i32 %tmp4, %tmp4
356 define i32 @smovw4h(<4 x i16> %tmp1) {
357 ; CHECK-LABEL: smovw4h:
358 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
359 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
360 %tmp4 = sext i16 %tmp3 to i32
361 %tmp5 = add i32 %tmp4, %tmp4
365 define i32 @smovx8b(<8 x i8> %tmp1) {
366 ; CHECK-LABEL: smovx8b:
367 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
368 %tmp3 = extractelement <8 x i8> %tmp1, i32 6
369 %tmp4 = sext i8 %tmp3 to i32
373 define i32 @smovx4h(<4 x i16> %tmp1) {
374 ; CHECK-LABEL: smovx4h:
375 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
376 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
377 %tmp4 = sext i16 %tmp3 to i32
381 define i64 @smovx2s(<2 x i32> %tmp1) {
382 ; CHECK-LABEL: smovx2s:
383 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
384 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
385 %tmp4 = sext i32 %tmp3 to i64
389 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
390 ; CHECK-LABEL: test_vcopy_lane_s8:
391 ; CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
392 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
393 ret <8 x i8> %vset_lane
396 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
397 ; CHECK-LABEL: test_vcopyq_laneq_s8:
398 ; CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
399 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
400 ret <16 x i8> %vset_lane
403 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
404 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
405 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
406 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
407 ret <8 x i8> %vset_lane
410 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
411 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
412 ; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
413 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
414 ret <16 x i8> %vset_lane
417 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
418 ; CHECK-LABEL: test_vdup_n_u8:
419 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
420 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
421 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
422 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
423 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
424 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
425 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
426 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
427 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
428 ret <8 x i8> %vecinit7.i
431 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
432 ; CHECK-LABEL: test_vdup_n_u16:
433 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
434 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
435 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
436 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
437 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
438 ret <4 x i16> %vecinit3.i
441 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
442 ; CHECK-LABEL: test_vdup_n_u32:
443 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
444 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
445 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
446 ret <2 x i32> %vecinit1.i
449 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
450 ; CHECK-LABEL: test_vdup_n_u64:
451 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
452 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
453 ret <1 x i64> %vecinit.i
456 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
457 ; CHECK-LABEL: test_vdupq_n_u8:
458 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
459 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
460 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
461 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
462 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
463 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
464 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
465 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
466 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
467 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
468 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
469 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
470 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
471 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
472 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
473 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
474 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
475 ret <16 x i8> %vecinit15.i
478 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
479 ; CHECK-LABEL: test_vdupq_n_u16:
480 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
481 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
482 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
483 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
484 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
485 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
486 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
487 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
488 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
489 ret <8 x i16> %vecinit7.i
492 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
493 ; CHECK-LABEL: test_vdupq_n_u32:
494 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
495 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
496 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
497 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
498 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
499 ret <4 x i32> %vecinit3.i
502 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
503 ; CHECK-LABEL: test_vdupq_n_u64:
504 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
505 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
506 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
507 ret <2 x i64> %vecinit1.i
510 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
511 ; CHECK-LABEL: test_vdup_lane_s8:
512 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
513 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
514 ret <8 x i8> %shuffle
517 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
518 ; CHECK-LABEL: test_vdup_lane_s16:
519 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
520 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
521 ret <4 x i16> %shuffle
524 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
525 ; CHECK-LABEL: test_vdup_lane_s32:
526 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
527 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
528 ret <2 x i32> %shuffle
531 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
532 ; CHECK-LABEL: test_vdupq_lane_s8:
533 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
534 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
535 ret <16 x i8> %shuffle
538 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
539 ; CHECK-LABEL: test_vdupq_lane_s16:
540 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
541 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
542 ret <8 x i16> %shuffle
545 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
546 ; CHECK-LABEL: test_vdupq_lane_s32:
547 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
548 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
549 ret <4 x i32> %shuffle
552 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
553 ; CHECK-LABEL: test_vdupq_lane_s64:
554 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
555 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
556 ret <2 x i64> %shuffle
559 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
560 ; CHECK-LABEL: test_vdup_laneq_s8:
561 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
562 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
563 ret <8 x i8> %shuffle
566 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
567 ; CHECK-LABEL: test_vdup_laneq_s16:
568 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
569 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
570 ret <4 x i16> %shuffle
573 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
574 ; CHECK-LABEL: test_vdup_laneq_s32:
575 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
576 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
577 ret <2 x i32> %shuffle
580 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
581 ; CHECK-LABEL: test_vdupq_laneq_s8:
582 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
583 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
584 ret <16 x i8> %shuffle
587 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
588 ; CHECK-LABEL: test_vdupq_laneq_s16:
589 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
590 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
591 ret <8 x i16> %shuffle
594 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
595 ; CHECK-LABEL: test_vdupq_laneq_s32:
596 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
597 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
598 ret <4 x i32> %shuffle
601 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
602 ; CHECK-LABEL: test_vdupq_laneq_s64:
603 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
604 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
605 ret <2 x i64> %shuffle
608 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
609 ; CHECK-LABEL: test_bitcastv8i8toi64:
610 %res = bitcast <8 x i8> %in to i64
611 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
615 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
616 ; CHECK-LABEL: test_bitcastv4i16toi64:
617 %res = bitcast <4 x i16> %in to i64
618 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
622 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
623 ; CHECK-LABEL: test_bitcastv2i32toi64:
624 %res = bitcast <2 x i32> %in to i64
625 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
629 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
630 ; CHECK-LABEL: test_bitcastv2f32toi64:
631 %res = bitcast <2 x float> %in to i64
632 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
636 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
637 ; CHECK-LABEL: test_bitcastv1i64toi64:
638 %res = bitcast <1 x i64> %in to i64
639 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
643 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
644 ; CHECK-LABEL: test_bitcastv1f64toi64:
645 %res = bitcast <1 x double> %in to i64
646 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
650 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
651 ; CHECK-LABEL: test_bitcasti64tov8i8:
652 %res = bitcast i64 %in to <8 x i8>
653 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
657 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
658 ; CHECK-LABEL: test_bitcasti64tov4i16:
659 %res = bitcast i64 %in to <4 x i16>
660 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
664 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
665 ; CHECK-LABEL: test_bitcasti64tov2i32:
666 %res = bitcast i64 %in to <2 x i32>
667 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
671 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
672 ; CHECK-LABEL: test_bitcasti64tov2f32:
673 %res = bitcast i64 %in to <2 x float>
674 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
678 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
679 ; CHECK-LABEL: test_bitcasti64tov1i64:
680 %res = bitcast i64 %in to <1 x i64>
681 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
685 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
686 ; CHECK-LABEL: test_bitcasti64tov1f64:
687 %res = bitcast i64 %in to <1 x double>
688 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
689 ret <1 x double> %res
692 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
693 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
694 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
695 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
696 %sub.i = sub <8 x i8> zeroinitializer, %a
697 %1 = bitcast <8 x i8> %sub.i to <1 x double>
698 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
699 ret <1 x i64> %vcvt.i
702 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
703 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
704 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
705 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
706 %sub.i = sub <4 x i16> zeroinitializer, %a
707 %1 = bitcast <4 x i16> %sub.i to <1 x double>
708 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
709 ret <1 x i64> %vcvt.i
712 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
713 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
714 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
715 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
716 %sub.i = sub <2 x i32> zeroinitializer, %a
717 %1 = bitcast <2 x i32> %sub.i to <1 x double>
718 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
719 ret <1 x i64> %vcvt.i
722 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
723 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
724 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
725 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
726 %sub.i = sub <1 x i64> zeroinitializer, %a
727 %1 = bitcast <1 x i64> %sub.i to <1 x double>
728 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
729 ret <1 x i64> %vcvt.i
732 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
733 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
734 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
735 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
736 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
737 %1 = bitcast <2 x float> %sub.i to <1 x double>
738 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
739 ret <1 x i64> %vcvt.i
742 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
743 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
744 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
745 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
746 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
747 %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
748 %sub.i = sub <8 x i8> zeroinitializer, %1
752 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
753 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
754 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
755 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
756 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
757 %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
758 %sub.i = sub <4 x i16> zeroinitializer, %1
762 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
763 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
764 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
765 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
766 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
767 %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
768 %sub.i = sub <2 x i32> zeroinitializer, %1
772 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
773 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
774 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
775 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
776 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
777 %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
778 %sub.i = sub <1 x i64> zeroinitializer, %1
782 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
783 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
784 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
785 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
786 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
787 %1 = bitcast <1 x double> %vcvt.i to <2 x float>
788 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
789 ret <2 x float> %sub.i
792 ; Test insert element into an undef vector
793 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
794 ; CHECK-LABEL: scalar_to_vector.v8i8:
795 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
796 %b = insertelement <8 x i8> undef, i8 %a, i32 0
800 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
801 ; CHECK-LABEL: scalar_to_vector.v16i8:
802 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
803 %b = insertelement <16 x i8> undef, i8 %a, i32 0
807 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
808 ; CHECK-LABEL: scalar_to_vector.v4i16:
809 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
810 %b = insertelement <4 x i16> undef, i16 %a, i32 0
814 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
815 ; CHECK-LABEL: scalar_to_vector.v8i16:
816 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
817 %b = insertelement <8 x i16> undef, i16 %a, i32 0
821 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
822 ; CHECK-LABEL: scalar_to_vector.v2i32:
823 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
824 %b = insertelement <2 x i32> undef, i32 %a, i32 0
828 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
829 ; CHECK-LABEL: scalar_to_vector.v4i32:
830 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
831 %b = insertelement <4 x i32> undef, i32 %a, i32 0
835 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
836 ; CHECK-LABEL: scalar_to_vector.v2i64:
837 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
838 %b = insertelement <2 x i64> undef, i64 %a, i32 0
842 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
843 ; CHECK-LABEL: testDUP.v1i8:
844 ; CHECK: dup v0.8b, v0.b[0]
845 %b = extractelement <1 x i8> %a, i32 0
846 %c = insertelement <8 x i8> undef, i8 %b, i32 0
847 %d = insertelement <8 x i8> %c, i8 %b, i32 1
848 %e = insertelement <8 x i8> %d, i8 %b, i32 2
849 %f = insertelement <8 x i8> %e, i8 %b, i32 3
850 %g = insertelement <8 x i8> %f, i8 %b, i32 4
851 %h = insertelement <8 x i8> %g, i8 %b, i32 5
852 %i = insertelement <8 x i8> %h, i8 %b, i32 6
853 %j = insertelement <8 x i8> %i, i8 %b, i32 7
857 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
858 ; CHECK-LABEL: testDUP.v1i16:
859 ; CHECK: dup v0.8h, v0.h[0]
860 %b = extractelement <1 x i16> %a, i32 0
861 %c = insertelement <8 x i16> undef, i16 %b, i32 0
862 %d = insertelement <8 x i16> %c, i16 %b, i32 1
863 %e = insertelement <8 x i16> %d, i16 %b, i32 2
864 %f = insertelement <8 x i16> %e, i16 %b, i32 3
865 %g = insertelement <8 x i16> %f, i16 %b, i32 4
866 %h = insertelement <8 x i16> %g, i16 %b, i32 5
867 %i = insertelement <8 x i16> %h, i16 %b, i32 6
868 %j = insertelement <8 x i16> %i, i16 %b, i32 7
872 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
873 ; CHECK-LABEL: testDUP.v1i32:
874 ; CHECK: dup v0.4s, v0.s[0]
875 %b = extractelement <1 x i32> %a, i32 0
876 %c = insertelement <4 x i32> undef, i32 %b, i32 0
877 %d = insertelement <4 x i32> %c, i32 %b, i32 1
878 %e = insertelement <4 x i32> %d, i32 %b, i32 2
879 %f = insertelement <4 x i32> %e, i32 %b, i32 3
883 define <8 x i8> @getl(<16 x i8> %x) #0 {
886 %vecext = extractelement <16 x i8> %x, i32 0
887 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
888 %vecext1 = extractelement <16 x i8> %x, i32 1
889 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
890 %vecext3 = extractelement <16 x i8> %x, i32 2
891 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
892 %vecext5 = extractelement <16 x i8> %x, i32 3
893 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
894 %vecext7 = extractelement <16 x i8> %x, i32 4
895 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
896 %vecext9 = extractelement <16 x i8> %x, i32 5
897 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
898 %vecext11 = extractelement <16 x i8> %x, i32 6
899 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
900 %vecext13 = extractelement <16 x i8> %x, i32 7
901 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
902 ret <8 x i8> %vecinit14
905 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
906 ; CHECK-LABEL: test_dup_v2i32_v4i16:
907 ; CHECK: dup v0.4h, v0.h[2]
909 %x = extractelement <2 x i32> %a, i32 1
910 %vget_lane = trunc i32 %x to i16
911 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
912 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
913 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
914 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
915 ret <4 x i16> %vecinit3.i
918 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
919 ; CHECK-LABEL: test_dup_v4i32_v8i16:
920 ; CHECK: dup v0.8h, v0.h[6]
922 %x = extractelement <4 x i32> %a, i32 3
923 %vget_lane = trunc i32 %x to i16
924 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
925 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
926 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
927 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
928 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
929 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
930 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
931 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
932 ret <8 x i16> %vecinit7.i
935 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
936 ; CHECK-LABEL: test_dup_v1i64_v4i16:
937 ; CHECK: dup v0.4h, v0.h[0]
939 %x = extractelement <1 x i64> %a, i32 0
940 %vget_lane = trunc i64 %x to i16
941 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
942 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
943 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
944 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
945 ret <4 x i16> %vecinit3.i
948 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
949 ; CHECK-LABEL: test_dup_v1i64_v2i32:
950 ; CHECK: dup v0.2s, v0.s[0]
952 %x = extractelement <1 x i64> %a, i32 0
953 %vget_lane = trunc i64 %x to i32
954 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
955 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
956 ret <2 x i32> %vecinit1.i
959 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
960 ; CHECK-LABEL: test_dup_v2i64_v8i16:
961 ; CHECK: dup v0.8h, v0.h[4]
963 %x = extractelement <2 x i64> %a, i32 1
964 %vget_lane = trunc i64 %x to i16
965 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
966 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
967 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
968 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
969 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
970 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
971 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
972 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
973 ret <8 x i16> %vecinit7.i
976 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
977 ; CHECK-LABEL: test_dup_v2i64_v4i32:
978 ; CHECK: dup v0.4s, v0.s[2]
980 %x = extractelement <2 x i64> %a, i32 1
981 %vget_lane = trunc i64 %x to i32
982 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
983 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
984 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
985 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
986 ret <4 x i32> %vecinit3.i
989 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
990 ; CHECK-LABEL: test_dup_v4i32_v4i16:
991 ; CHECK: dup v0.4h, v0.h[2]
993 %x = extractelement <4 x i32> %a, i32 1
994 %vget_lane = trunc i32 %x to i16
995 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
996 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
997 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
998 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
999 ret <4 x i16> %vecinit3.i
1002 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1003 ; CHECK-LABEL: test_dup_v2i64_v4i16:
1004 ; CHECK: dup v0.4h, v0.h[0]
1006 %x = extractelement <2 x i64> %a, i32 0
1007 %vget_lane = trunc i64 %x to i16
1008 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1009 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1010 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1011 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1012 ret <4 x i16> %vecinit3.i
1015 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1016 ; CHECK-LABEL: test_dup_v2i64_v2i32:
1017 ; CHECK: dup v0.2s, v0.s[0]
1019 %x = extractelement <2 x i64> %a, i32 0
1020 %vget_lane = trunc i64 %x to i32
1021 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1022 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1023 ret <2 x i32> %vecinit1.i
1027 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1028 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1029 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1032 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1033 %1 = insertelement <1 x float> undef, float %0, i32 0
1034 %2 = extractelement <1 x float> %1, i32 0
1035 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1036 ret <2 x float> %vecinit1.i
1039 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1040 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1041 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1044 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1045 %1 = insertelement <1 x float> undef, float %0, i32 0
1046 %2 = extractelement <1 x float> %1, i32 0
1047 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1048 ret <4 x float> %vecinit1.i
1051 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1053 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1054 ; CHECK-LABEL: test_concat_undef_v1i32:
1055 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1057 %0 = extractelement <2 x i32> %a, i32 0
1058 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1059 ret <2 x i32> %vecinit1.i
1062 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1064 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1065 ; CHECK-LABEL: test_concat_v1i32_undef:
1066 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1069 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1070 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1071 ret <2 x i32> %vecinit.i432
1074 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1075 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1076 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
1078 %0 = extractelement <2 x i32> %a, i32 0
1079 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1080 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1081 ret <2 x i32> %vecinit1.i
1084 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1085 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1086 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1087 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1088 ; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
1090 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1091 %d = insertelement <2 x i32> undef, i32 %c, i32 0
1092 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1093 %f = insertelement <2 x i32> undef, i32 %e, i32 0
1094 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1098 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1099 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1100 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1102 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1103 ret <16 x i8> %vecinit30
1106 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1107 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1108 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1110 %vecext = extractelement <8 x i8> %x, i32 0
1111 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1112 %vecext1 = extractelement <8 x i8> %x, i32 1
1113 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1114 %vecext3 = extractelement <8 x i8> %x, i32 2
1115 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1116 %vecext5 = extractelement <8 x i8> %x, i32 3
1117 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1118 %vecext7 = extractelement <8 x i8> %x, i32 4
1119 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1120 %vecext9 = extractelement <8 x i8> %x, i32 5
1121 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1122 %vecext11 = extractelement <8 x i8> %x, i32 6
1123 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1124 %vecext13 = extractelement <8 x i8> %x, i32 7
1125 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1126 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1127 ret <16 x i8> %vecinit30
1130 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1131 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1132 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1134 %vecext = extractelement <16 x i8> %x, i32 0
1135 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1136 %vecext1 = extractelement <16 x i8> %x, i32 1
1137 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1138 %vecext3 = extractelement <16 x i8> %x, i32 2
1139 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1140 %vecext5 = extractelement <16 x i8> %x, i32 3
1141 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1142 %vecext7 = extractelement <16 x i8> %x, i32 4
1143 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1144 %vecext9 = extractelement <16 x i8> %x, i32 5
1145 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1146 %vecext11 = extractelement <16 x i8> %x, i32 6
1147 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1148 %vecext13 = extractelement <16 x i8> %x, i32 7
1149 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1150 %vecext15 = extractelement <8 x i8> %y, i32 0
1151 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1152 %vecext17 = extractelement <8 x i8> %y, i32 1
1153 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1154 %vecext19 = extractelement <8 x i8> %y, i32 2
1155 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1156 %vecext21 = extractelement <8 x i8> %y, i32 3
1157 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1158 %vecext23 = extractelement <8 x i8> %y, i32 4
1159 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1160 %vecext25 = extractelement <8 x i8> %y, i32 5
1161 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1162 %vecext27 = extractelement <8 x i8> %y, i32 6
1163 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1164 %vecext29 = extractelement <8 x i8> %y, i32 7
1165 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1166 ret <16 x i8> %vecinit30
1169 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1170 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1171 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1173 %vecext = extractelement <8 x i8> %x, i32 0
1174 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1175 %vecext1 = extractelement <8 x i8> %x, i32 1
1176 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1177 %vecext3 = extractelement <8 x i8> %x, i32 2
1178 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1179 %vecext5 = extractelement <8 x i8> %x, i32 3
1180 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1181 %vecext7 = extractelement <8 x i8> %x, i32 4
1182 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1183 %vecext9 = extractelement <8 x i8> %x, i32 5
1184 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1185 %vecext11 = extractelement <8 x i8> %x, i32 6
1186 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1187 %vecext13 = extractelement <8 x i8> %x, i32 7
1188 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1189 %vecext15 = extractelement <8 x i8> %y, i32 0
1190 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1191 %vecext17 = extractelement <8 x i8> %y, i32 1
1192 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1193 %vecext19 = extractelement <8 x i8> %y, i32 2
1194 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1195 %vecext21 = extractelement <8 x i8> %y, i32 3
1196 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1197 %vecext23 = extractelement <8 x i8> %y, i32 4
1198 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1199 %vecext25 = extractelement <8 x i8> %y, i32 5
1200 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1201 %vecext27 = extractelement <8 x i8> %y, i32 6
1202 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1203 %vecext29 = extractelement <8 x i8> %y, i32 7
1204 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1205 ret <16 x i8> %vecinit30
1208 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1209 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1210 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1212 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1213 ret <8 x i16> %vecinit14
1216 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1217 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1218 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1220 %vecext = extractelement <4 x i16> %x, i32 0
1221 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1222 %vecext1 = extractelement <4 x i16> %x, i32 1
1223 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1224 %vecext3 = extractelement <4 x i16> %x, i32 2
1225 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1226 %vecext5 = extractelement <4 x i16> %x, i32 3
1227 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1228 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1229 ret <8 x i16> %vecinit14
1232 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1233 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1234 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1236 %vecext = extractelement <8 x i16> %x, i32 0
1237 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1238 %vecext1 = extractelement <8 x i16> %x, i32 1
1239 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1240 %vecext3 = extractelement <8 x i16> %x, i32 2
1241 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1242 %vecext5 = extractelement <8 x i16> %x, i32 3
1243 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1244 %vecext7 = extractelement <4 x i16> %y, i32 0
1245 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1246 %vecext9 = extractelement <4 x i16> %y, i32 1
1247 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1248 %vecext11 = extractelement <4 x i16> %y, i32 2
1249 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1250 %vecext13 = extractelement <4 x i16> %y, i32 3
1251 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1252 ret <8 x i16> %vecinit14
1255 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1256 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1257 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1259 %vecext = extractelement <4 x i16> %x, i32 0
1260 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1261 %vecext1 = extractelement <4 x i16> %x, i32 1
1262 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1263 %vecext3 = extractelement <4 x i16> %x, i32 2
1264 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1265 %vecext5 = extractelement <4 x i16> %x, i32 3
1266 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1267 %vecext7 = extractelement <4 x i16> %y, i32 0
1268 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1269 %vecext9 = extractelement <4 x i16> %y, i32 1
1270 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1271 %vecext11 = extractelement <4 x i16> %y, i32 2
1272 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1273 %vecext13 = extractelement <4 x i16> %y, i32 3
1274 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1275 ret <8 x i16> %vecinit14
1278 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1279 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1280 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1282 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1283 ret <4 x i32> %vecinit6
1286 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1287 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1288 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1290 %vecext = extractelement <2 x i32> %x, i32 0
1291 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1292 %vecext1 = extractelement <2 x i32> %x, i32 1
1293 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1294 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1295 ret <4 x i32> %vecinit6
1298 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1299 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1300 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1302 %vecext = extractelement <4 x i32> %x, i32 0
1303 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1304 %vecext1 = extractelement <4 x i32> %x, i32 1
1305 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1306 %vecext3 = extractelement <2 x i32> %y, i32 0
1307 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1308 %vecext5 = extractelement <2 x i32> %y, i32 1
1309 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1310 ret <4 x i32> %vecinit6
1313 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1314 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1315 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1317 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1318 ret <4 x i32> %vecinit6
1321 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1322 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1323 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1325 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1326 ret <2 x i64> %vecinit2
1329 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1330 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1331 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1333 %vecext = extractelement <1 x i64> %x, i32 0
1334 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1335 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1336 ret <2 x i64> %vecinit2
1339 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1340 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1341 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1343 %vecext = extractelement <2 x i64> %x, i32 0
1344 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1345 %vecext1 = extractelement <1 x i64> %y, i32 0
1346 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1347 ret <2 x i64> %vecinit2
1350 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1351 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1352 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1354 %vecext = extractelement <1 x i64> %x, i32 0
1355 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1356 %vecext1 = extractelement <1 x i64> %y, i32 0
1357 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1358 ret <2 x i64> %vecinit2
1362 define <4 x i16> @concat_vector_v4i16_const() {
1363 ; CHECK-LABEL: concat_vector_v4i16_const:
1364 ; CHECK: movi {{d[0-9]+}}, #0
1365 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1369 define <4 x i16> @concat_vector_v4i16_const_one() {
1370 ; CHECK-LABEL: concat_vector_v4i16_const_one:
1371 ; CHECK: movi {{v[0-9]+}}.4h, #0x1
1372 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1376 define <4 x i32> @concat_vector_v4i32_const() {
1377 ; CHECK-LABEL: concat_vector_v4i32_const:
1378 ; CHECK: movi {{v[0-9]+}}.2d, #0
1379 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1383 define <8 x i8> @concat_vector_v8i8_const() {
1384 ; CHECK-LABEL: concat_vector_v8i8_const:
1385 ; CHECK: movi {{d[0-9]+}}, #0
1386 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1390 define <8 x i16> @concat_vector_v8i16_const() {
1391 ; CHECK-LABEL: concat_vector_v8i16_const:
1392 ; CHECK: movi {{v[0-9]+}}.2d, #0
1393 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1397 define <8 x i16> @concat_vector_v8i16_const_one() {
1398 ; CHECK-LABEL: concat_vector_v8i16_const_one:
1399 ; CHECK: movi {{v[0-9]+}}.8h, #0x1
1400 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1404 define <16 x i8> @concat_vector_v16i8_const() {
1405 ; CHECK-LABEL: concat_vector_v16i8_const:
1406 ; CHECK: movi {{v[0-9]+}}.2d, #0
1407 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1411 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1412 ; CHECK-LABEL: concat_vector_v4i16:
1413 ; CHECK: dup v0.4h, v0.h[0]
1414 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1418 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1419 ; CHECK-LABEL: concat_vector_v4i32:
1420 ; CHECK: dup v0.4s, v0.s[0]
1421 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1425 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1426 ; CHECK-LABEL: concat_vector_v8i8:
1427 ; CHECK: dup v0.8b, v0.b[0]
1428 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1432 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1433 ; CHECK-LABEL: concat_vector_v8i16:
1434 ; CHECK: dup v0.8h, v0.h[0]
1435 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1439 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1440 ; CHECK-LABEL: concat_vector_v16i8:
1441 ; CHECK: dup v0.16b, v0.b[0]
1442 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer