1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5 ; CHECK-LABEL: ins16bw:
6 ; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
12 ; CHECK-LABEL: ins8hw:
13 ; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
19 ; CHECK-LABEL: ins4sw:
20 ; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
26 ; CHECK-LABEL: ins2dw:
27 ; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
33 ; CHECK-LABEL: ins8bw:
34 ; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
40 ; CHECK-LABEL: ins4hw:
41 ; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
47 ; CHECK-LABEL: ins2sw:
48 ; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
54 ; CHECK-LABEL: ins16b16:
55 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
62 ; CHECK-LABEL: ins8h8:
63 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
70 ; CHECK-LABEL: ins4s4:
71 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
78 ; CHECK-LABEL: ins2d2:
79 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
86 ; CHECK-LABEL: ins4f4:
87 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
88 %tmp3 = extractelement <4 x float> %tmp1, i32 2
89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
94 ; CHECK-LABEL: ins2df2:
95 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
96 %tmp3 = extractelement <2 x double> %tmp1, i32 0
97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
98 ret <2 x double> %tmp4
101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
102 ; CHECK-LABEL: ins8b16:
103 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
110 ; CHECK-LABEL: ins4h8:
111 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
118 ; CHECK-LABEL: ins2s4:
119 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
126 ; CHECK-LABEL: ins1d2:
127 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
134 ; CHECK-LABEL: ins2f4:
135 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
136 %tmp3 = extractelement <2 x float> %tmp1, i32 1
137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
138 ret <4 x float> %tmp4
141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
142 ; CHECK-LABEL: ins1f2:
143 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
144 %tmp3 = extractelement <1 x double> %tmp1, i32 0
145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
146 ret <2 x double> %tmp4
149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
150 ; CHECK-LABEL: ins16b8:
151 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
158 ; CHECK-LABEL: ins8h4:
159 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
166 ; CHECK-LABEL: ins4s2:
167 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
174 ; CHECK-LABEL: ins2d1:
175 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
182 ; CHECK-LABEL: ins4f2:
183 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
184 %tmp3 = extractelement <4 x float> %tmp1, i32 2
185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
186 ret <2 x float> %tmp4
189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
190 ; CHECK-LABEL: ins2f1:
191 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
192 %tmp3 = extractelement <2 x double> %tmp1, i32 1
193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
194 ret <1 x double> %tmp4
197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
198 ; CHECK-LABEL: ins8b8:
199 ; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
206 ; CHECK-LABEL: ins4h4:
207 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
214 ; CHECK-LABEL: ins2s2:
215 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0
217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
222 ; CHECK-LABEL: ins1d1:
223 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
230 ; CHECK-LABEL: ins2f2:
231 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
232 %tmp3 = extractelement <2 x float> %tmp1, i32 0
233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
234 ret <2 x float> %tmp4
237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
238 ; CHECK-LABEL: ins1df1:
239 ; CHECK-NOT: ins {{v[0-9]+}}
240 %tmp3 = extractelement <1 x double> %tmp1, i32 0
241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
242 ret <1 x double> %tmp4
245 define i32 @umovw16b(<16 x i8> %tmp1) {
246 ; CHECK-LABEL: umovw16b:
247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
249 %tmp4 = zext i8 %tmp3 to i32
253 define i32 @umovw8h(<8 x i16> %tmp1) {
254 ; CHECK-LABEL: umovw8h:
255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
257 %tmp4 = zext i16 %tmp3 to i32
261 define i32 @umovw4s(<4 x i32> %tmp1) {
262 ; CHECK-LABEL: umovw4s:
263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
268 define i64 @umovx2d(<2 x i64> %tmp1) {
269 ; CHECK-LABEL: umovx2d:
270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1
275 define i32 @umovw8b(<8 x i8> %tmp1) {
276 ; CHECK-LABEL: umovw8b:
277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7
279 %tmp4 = zext i8 %tmp3 to i32
283 define i32 @umovw4h(<4 x i16> %tmp1) {
284 ; CHECK-LABEL: umovw4h:
285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
287 %tmp4 = zext i16 %tmp3 to i32
291 define i32 @umovw2s(<2 x i32> %tmp1) {
292 ; CHECK-LABEL: umovw2s:
293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
298 define i64 @umovx1d(<1 x i64> %tmp1) {
299 ; CHECK-LABEL: umovx1d:
300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
305 define i32 @smovw16b(<16 x i8> %tmp1) {
306 ; CHECK-LABEL: smovw16b:
307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
309 %tmp4 = sext i8 %tmp3 to i32
310 %tmp5 = add i32 %tmp4, %tmp4
314 define i32 @smovw8h(<8 x i16> %tmp1) {
315 ; CHECK-LABEL: smovw8h:
316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
318 %tmp4 = sext i16 %tmp3 to i32
319 %tmp5 = add i32 %tmp4, %tmp4
323 define i32 @smovx16b(<16 x i8> %tmp1) {
324 ; CHECK-LABEL: smovx16b:
325 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
327 %tmp4 = sext i8 %tmp3 to i32
328 %tmp5 = add i32 %tmp4, %tmp4
332 define i32 @smovx8h(<8 x i16> %tmp1) {
333 ; CHECK-LABEL: smovx8h:
334 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
335 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
336 %tmp4 = sext i16 %tmp3 to i32
340 define i64 @smovx4s(<4 x i32> %tmp1) {
341 ; CHECK-LABEL: smovx4s:
342 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
343 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
344 %tmp4 = sext i32 %tmp3 to i64
348 define i32 @smovw8b(<8 x i8> %tmp1) {
349 ; CHECK-LABEL: smovw8b:
350 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
351 %tmp3 = extractelement <8 x i8> %tmp1, i32 4
352 %tmp4 = sext i8 %tmp3 to i32
353 %tmp5 = add i32 %tmp4, %tmp4
357 define i32 @smovw4h(<4 x i16> %tmp1) {
358 ; CHECK-LABEL: smovw4h:
359 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
360 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
361 %tmp4 = sext i16 %tmp3 to i32
362 %tmp5 = add i32 %tmp4, %tmp4
366 define i32 @smovx8b(<8 x i8> %tmp1) {
367 ; CHECK-LABEL: smovx8b:
368 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
369 %tmp3 = extractelement <8 x i8> %tmp1, i32 6
370 %tmp4 = sext i8 %tmp3 to i32
374 define i32 @smovx4h(<4 x i16> %tmp1) {
375 ; CHECK-LABEL: smovx4h:
376 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
377 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
378 %tmp4 = sext i16 %tmp3 to i32
382 define i64 @smovx2s(<2 x i32> %tmp1) {
383 ; CHECK-LABEL: smovx2s:
384 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
385 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
386 %tmp4 = sext i32 %tmp3 to i64
390 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
391 ; CHECK-LABEL: test_vcopy_lane_s8:
392 ; CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
393 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
394 ret <8 x i8> %vset_lane
397 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
398 ; CHECK-LABEL: test_vcopyq_laneq_s8:
399 ; CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
400 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
401 ret <16 x i8> %vset_lane
404 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
405 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
406 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
407 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
408 ret <8 x i8> %vset_lane
411 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
412 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
413 ; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
414 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
415 ret <16 x i8> %vset_lane
418 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
419 ; CHECK-LABEL: test_vdup_n_u8:
420 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
421 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
422 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
423 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
424 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
425 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
426 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
427 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
428 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
429 ret <8 x i8> %vecinit7.i
432 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
433 ; CHECK-LABEL: test_vdup_n_u16:
434 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
435 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
436 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
437 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
438 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
439 ret <4 x i16> %vecinit3.i
442 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
443 ; CHECK-LABEL: test_vdup_n_u32:
444 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
445 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
446 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
447 ret <2 x i32> %vecinit1.i
450 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
451 ; CHECK-LABEL: test_vdup_n_u64:
452 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
453 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
454 ret <1 x i64> %vecinit.i
457 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
458 ; CHECK-LABEL: test_vdupq_n_u8:
459 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
460 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
461 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
462 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
463 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
464 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
465 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
466 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
467 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
468 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
469 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
470 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
471 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
472 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
473 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
474 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
475 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
476 ret <16 x i8> %vecinit15.i
479 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
480 ; CHECK-LABEL: test_vdupq_n_u16:
481 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
482 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
483 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
484 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
485 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
486 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
487 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
488 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
489 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
490 ret <8 x i16> %vecinit7.i
493 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
494 ; CHECK-LABEL: test_vdupq_n_u32:
495 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
496 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
497 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
498 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
499 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
500 ret <4 x i32> %vecinit3.i
503 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
504 ; CHECK-LABEL: test_vdupq_n_u64:
505 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
506 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
507 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
508 ret <2 x i64> %vecinit1.i
511 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
512 ; CHECK-LABEL: test_vdup_lane_s8:
513 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
514 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
515 ret <8 x i8> %shuffle
518 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
519 ; CHECK-LABEL: test_vdup_lane_s16:
520 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
521 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
522 ret <4 x i16> %shuffle
525 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
526 ; CHECK-LABEL: test_vdup_lane_s32:
527 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
528 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
529 ret <2 x i32> %shuffle
532 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
533 ; CHECK-LABEL: test_vdupq_lane_s8:
534 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
535 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
536 ret <16 x i8> %shuffle
539 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
540 ; CHECK-LABEL: test_vdupq_lane_s16:
541 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
542 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
543 ret <8 x i16> %shuffle
546 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
547 ; CHECK-LABEL: test_vdupq_lane_s32:
548 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
549 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
550 ret <4 x i32> %shuffle
553 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
554 ; CHECK-LABEL: test_vdupq_lane_s64:
555 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
556 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
557 ret <2 x i64> %shuffle
560 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
561 ; CHECK-LABEL: test_vdup_laneq_s8:
562 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
563 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
564 ret <8 x i8> %shuffle
567 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
568 ; CHECK-LABEL: test_vdup_laneq_s16:
569 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
570 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
571 ret <4 x i16> %shuffle
574 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
575 ; CHECK-LABEL: test_vdup_laneq_s32:
576 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
577 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
578 ret <2 x i32> %shuffle
581 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
582 ; CHECK-LABEL: test_vdupq_laneq_s8:
583 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
584 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
585 ret <16 x i8> %shuffle
588 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
589 ; CHECK-LABEL: test_vdupq_laneq_s16:
590 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
591 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
592 ret <8 x i16> %shuffle
595 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
596 ; CHECK-LABEL: test_vdupq_laneq_s32:
597 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
598 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
599 ret <4 x i32> %shuffle
602 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
603 ; CHECK-LABEL: test_vdupq_laneq_s64:
604 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
605 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
606 ret <2 x i64> %shuffle
609 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
610 ; CHECK-LABEL: test_bitcastv8i8toi64:
611 %res = bitcast <8 x i8> %in to i64
612 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
616 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
617 ; CHECK-LABEL: test_bitcastv4i16toi64:
618 %res = bitcast <4 x i16> %in to i64
619 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
623 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
624 ; CHECK-LABEL: test_bitcastv2i32toi64:
625 %res = bitcast <2 x i32> %in to i64
626 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
630 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
631 ; CHECK-LABEL: test_bitcastv2f32toi64:
632 %res = bitcast <2 x float> %in to i64
633 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
637 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
638 ; CHECK-LABEL: test_bitcastv1i64toi64:
639 %res = bitcast <1 x i64> %in to i64
640 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
644 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
645 ; CHECK-LABEL: test_bitcastv1f64toi64:
646 %res = bitcast <1 x double> %in to i64
647 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
651 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
652 ; CHECK-LABEL: test_bitcasti64tov8i8:
653 %res = bitcast i64 %in to <8 x i8>
654 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
658 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
659 ; CHECK-LABEL: test_bitcasti64tov4i16:
660 %res = bitcast i64 %in to <4 x i16>
661 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
665 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
666 ; CHECK-LABEL: test_bitcasti64tov2i32:
667 %res = bitcast i64 %in to <2 x i32>
668 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
672 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
673 ; CHECK-LABEL: test_bitcasti64tov2f32:
674 %res = bitcast i64 %in to <2 x float>
675 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
679 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
680 ; CHECK-LABEL: test_bitcasti64tov1i64:
681 %res = bitcast i64 %in to <1 x i64>
682 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
686 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
687 ; CHECK-LABEL: test_bitcasti64tov1f64:
688 %res = bitcast i64 %in to <1 x double>
689 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
690 ret <1 x double> %res
693 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
694 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
695 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
696 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
697 %sub.i = sub <8 x i8> zeroinitializer, %a
698 %1 = bitcast <8 x i8> %sub.i to <1 x double>
699 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
700 ret <1 x i64> %vcvt.i
703 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
704 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
705 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
706 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
707 %sub.i = sub <4 x i16> zeroinitializer, %a
708 %1 = bitcast <4 x i16> %sub.i to <1 x double>
709 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
710 ret <1 x i64> %vcvt.i
713 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
714 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
715 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
716 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
717 %sub.i = sub <2 x i32> zeroinitializer, %a
718 %1 = bitcast <2 x i32> %sub.i to <1 x double>
719 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
720 ret <1 x i64> %vcvt.i
723 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
724 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
725 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
726 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
727 %sub.i = sub <1 x i64> zeroinitializer, %a
728 %1 = bitcast <1 x i64> %sub.i to <1 x double>
729 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
730 ret <1 x i64> %vcvt.i
733 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
734 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
735 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
736 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
737 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
738 %1 = bitcast <2 x float> %sub.i to <1 x double>
739 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
740 ret <1 x i64> %vcvt.i
743 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
744 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
745 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
746 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
747 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
748 %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
749 %sub.i = sub <8 x i8> zeroinitializer, %1
753 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
754 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
755 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
756 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
757 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
758 %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
759 %sub.i = sub <4 x i16> zeroinitializer, %1
763 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
764 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
765 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
766 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
767 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
768 %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
769 %sub.i = sub <2 x i32> zeroinitializer, %1
773 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
774 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
775 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
776 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
777 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
778 %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
779 %sub.i = sub <1 x i64> zeroinitializer, %1
783 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
784 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
785 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
786 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
787 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
788 %1 = bitcast <1 x double> %vcvt.i to <2 x float>
789 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
790 ret <2 x float> %sub.i
793 ; Test insert element into an undef vector
794 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
795 ; CHECK-LABEL: scalar_to_vector.v8i8:
796 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
797 %b = insertelement <8 x i8> undef, i8 %a, i32 0
801 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
802 ; CHECK-LABEL: scalar_to_vector.v16i8:
803 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
804 %b = insertelement <16 x i8> undef, i8 %a, i32 0
808 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
809 ; CHECK-LABEL: scalar_to_vector.v4i16:
810 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
811 %b = insertelement <4 x i16> undef, i16 %a, i32 0
815 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
816 ; CHECK-LABEL: scalar_to_vector.v8i16:
817 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
818 %b = insertelement <8 x i16> undef, i16 %a, i32 0
822 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
823 ; CHECK-LABEL: scalar_to_vector.v2i32:
824 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
825 %b = insertelement <2 x i32> undef, i32 %a, i32 0
829 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
830 ; CHECK-LABEL: scalar_to_vector.v4i32:
831 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
832 %b = insertelement <4 x i32> undef, i32 %a, i32 0
836 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
837 ; CHECK-LABEL: scalar_to_vector.v2i64:
838 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
839 %b = insertelement <2 x i64> undef, i64 %a, i32 0
843 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
844 ; CHECK-LABEL: testDUP.v1i8:
845 ; CHECK: dup v0.8b, v0.b[0]
846 %b = extractelement <1 x i8> %a, i32 0
847 %c = insertelement <8 x i8> undef, i8 %b, i32 0
848 %d = insertelement <8 x i8> %c, i8 %b, i32 1
849 %e = insertelement <8 x i8> %d, i8 %b, i32 2
850 %f = insertelement <8 x i8> %e, i8 %b, i32 3
851 %g = insertelement <8 x i8> %f, i8 %b, i32 4
852 %h = insertelement <8 x i8> %g, i8 %b, i32 5
853 %i = insertelement <8 x i8> %h, i8 %b, i32 6
854 %j = insertelement <8 x i8> %i, i8 %b, i32 7
858 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
859 ; CHECK-LABEL: testDUP.v1i16:
860 ; CHECK: dup v0.8h, v0.h[0]
861 %b = extractelement <1 x i16> %a, i32 0
862 %c = insertelement <8 x i16> undef, i16 %b, i32 0
863 %d = insertelement <8 x i16> %c, i16 %b, i32 1
864 %e = insertelement <8 x i16> %d, i16 %b, i32 2
865 %f = insertelement <8 x i16> %e, i16 %b, i32 3
866 %g = insertelement <8 x i16> %f, i16 %b, i32 4
867 %h = insertelement <8 x i16> %g, i16 %b, i32 5
868 %i = insertelement <8 x i16> %h, i16 %b, i32 6
869 %j = insertelement <8 x i16> %i, i16 %b, i32 7
873 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
874 ; CHECK-LABEL: testDUP.v1i32:
875 ; CHECK: dup v0.4s, v0.s[0]
876 %b = extractelement <1 x i32> %a, i32 0
877 %c = insertelement <4 x i32> undef, i32 %b, i32 0
878 %d = insertelement <4 x i32> %c, i32 %b, i32 1
879 %e = insertelement <4 x i32> %d, i32 %b, i32 2
880 %f = insertelement <4 x i32> %e, i32 %b, i32 3
884 define <8 x i8> @getl(<16 x i8> %x) #0 {
887 %vecext = extractelement <16 x i8> %x, i32 0
888 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
889 %vecext1 = extractelement <16 x i8> %x, i32 1
890 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
891 %vecext3 = extractelement <16 x i8> %x, i32 2
892 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
893 %vecext5 = extractelement <16 x i8> %x, i32 3
894 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
895 %vecext7 = extractelement <16 x i8> %x, i32 4
896 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
897 %vecext9 = extractelement <16 x i8> %x, i32 5
898 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
899 %vecext11 = extractelement <16 x i8> %x, i32 6
900 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
901 %vecext13 = extractelement <16 x i8> %x, i32 7
902 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
903 ret <8 x i8> %vecinit14
906 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
907 ; CHECK-LABEL: test_dup_v2i32_v4i16:
908 ; CHECK: dup v0.4h, v0.h[2]
910 %x = extractelement <2 x i32> %a, i32 1
911 %vget_lane = trunc i32 %x to i16
912 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
913 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
914 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
915 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
916 ret <4 x i16> %vecinit3.i
919 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
920 ; CHECK-LABEL: test_dup_v4i32_v8i16:
921 ; CHECK: dup v0.8h, v0.h[6]
923 %x = extractelement <4 x i32> %a, i32 3
924 %vget_lane = trunc i32 %x to i16
925 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
926 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
927 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
928 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
929 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
930 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
931 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
932 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
933 ret <8 x i16> %vecinit7.i
936 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
937 ; CHECK-LABEL: test_dup_v1i64_v4i16:
938 ; CHECK: dup v0.4h, v0.h[0]
940 %x = extractelement <1 x i64> %a, i32 0
941 %vget_lane = trunc i64 %x to i16
942 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
943 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
944 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
945 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
946 ret <4 x i16> %vecinit3.i
949 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
950 ; CHECK-LABEL: test_dup_v1i64_v2i32:
951 ; CHECK: dup v0.2s, v0.s[0]
953 %x = extractelement <1 x i64> %a, i32 0
954 %vget_lane = trunc i64 %x to i32
955 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
956 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
957 ret <2 x i32> %vecinit1.i
960 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
961 ; CHECK-LABEL: test_dup_v2i64_v8i16:
962 ; CHECK: dup v0.8h, v0.h[4]
964 %x = extractelement <2 x i64> %a, i32 1
965 %vget_lane = trunc i64 %x to i16
966 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
967 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
968 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
969 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
970 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
971 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
972 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
973 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
974 ret <8 x i16> %vecinit7.i
977 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
978 ; CHECK-LABEL: test_dup_v2i64_v4i32:
979 ; CHECK: dup v0.4s, v0.s[2]
981 %x = extractelement <2 x i64> %a, i32 1
982 %vget_lane = trunc i64 %x to i32
983 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
984 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
985 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
986 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
987 ret <4 x i32> %vecinit3.i
990 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
991 ; CHECK-LABEL: test_dup_v4i32_v4i16:
992 ; CHECK: dup v0.4h, v0.h[2]
994 %x = extractelement <4 x i32> %a, i32 1
995 %vget_lane = trunc i32 %x to i16
996 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
997 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
998 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
999 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1000 ret <4 x i16> %vecinit3.i
1003 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1004 ; CHECK-LABEL: test_dup_v2i64_v4i16:
1005 ; CHECK: dup v0.4h, v0.h[0]
1007 %x = extractelement <2 x i64> %a, i32 0
1008 %vget_lane = trunc i64 %x to i16
1009 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1010 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1011 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1012 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1013 ret <4 x i16> %vecinit3.i
1016 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1017 ; CHECK-LABEL: test_dup_v2i64_v2i32:
1018 ; CHECK: dup v0.2s, v0.s[0]
1020 %x = extractelement <2 x i64> %a, i32 0
1021 %vget_lane = trunc i64 %x to i32
1022 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1023 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1024 ret <2 x i32> %vecinit1.i
1028 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1029 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1030 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1033 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1034 %1 = insertelement <1 x float> undef, float %0, i32 0
1035 %2 = extractelement <1 x float> %1, i32 0
1036 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1037 ret <2 x float> %vecinit1.i
1040 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1041 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1042 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1045 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1046 %1 = insertelement <1 x float> undef, float %0, i32 0
1047 %2 = extractelement <1 x float> %1, i32 0
1048 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1049 ret <4 x float> %vecinit1.i
1052 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1054 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1055 ; CHECK-LABEL: test_concat_undef_v1i32:
1056 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1058 %0 = extractelement <2 x i32> %a, i32 0
1059 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1060 ret <2 x i32> %vecinit1.i
1063 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1065 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1066 ; CHECK-LABEL: test_concat_v1i32_undef:
1067 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1070 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1071 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1072 ret <2 x i32> %vecinit.i432
1075 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1076 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1077 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
1079 %0 = extractelement <2 x i32> %a, i32 0
1080 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1081 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1082 ret <2 x i32> %vecinit1.i
1085 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1086 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1087 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1088 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1089 ; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1091 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1092 %d = insertelement <2 x i32> undef, i32 %c, i32 0
1093 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1094 %f = insertelement <2 x i32> undef, i32 %e, i32 0
1095 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1099 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1100 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1101 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1103 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1104 ret <16 x i8> %vecinit30
1107 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1108 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1109 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1111 %vecext = extractelement <8 x i8> %x, i32 0
1112 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1113 %vecext1 = extractelement <8 x i8> %x, i32 1
1114 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1115 %vecext3 = extractelement <8 x i8> %x, i32 2
1116 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1117 %vecext5 = extractelement <8 x i8> %x, i32 3
1118 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1119 %vecext7 = extractelement <8 x i8> %x, i32 4
1120 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1121 %vecext9 = extractelement <8 x i8> %x, i32 5
1122 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1123 %vecext11 = extractelement <8 x i8> %x, i32 6
1124 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1125 %vecext13 = extractelement <8 x i8> %x, i32 7
1126 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1127 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1128 ret <16 x i8> %vecinit30
1131 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1132 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1133 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1135 %vecext = extractelement <16 x i8> %x, i32 0
1136 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1137 %vecext1 = extractelement <16 x i8> %x, i32 1
1138 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1139 %vecext3 = extractelement <16 x i8> %x, i32 2
1140 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1141 %vecext5 = extractelement <16 x i8> %x, i32 3
1142 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1143 %vecext7 = extractelement <16 x i8> %x, i32 4
1144 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1145 %vecext9 = extractelement <16 x i8> %x, i32 5
1146 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1147 %vecext11 = extractelement <16 x i8> %x, i32 6
1148 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1149 %vecext13 = extractelement <16 x i8> %x, i32 7
1150 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1151 %vecext15 = extractelement <8 x i8> %y, i32 0
1152 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1153 %vecext17 = extractelement <8 x i8> %y, i32 1
1154 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1155 %vecext19 = extractelement <8 x i8> %y, i32 2
1156 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1157 %vecext21 = extractelement <8 x i8> %y, i32 3
1158 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1159 %vecext23 = extractelement <8 x i8> %y, i32 4
1160 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1161 %vecext25 = extractelement <8 x i8> %y, i32 5
1162 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1163 %vecext27 = extractelement <8 x i8> %y, i32 6
1164 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1165 %vecext29 = extractelement <8 x i8> %y, i32 7
1166 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1167 ret <16 x i8> %vecinit30
1170 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1171 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1172 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1174 %vecext = extractelement <8 x i8> %x, i32 0
1175 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1176 %vecext1 = extractelement <8 x i8> %x, i32 1
1177 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1178 %vecext3 = extractelement <8 x i8> %x, i32 2
1179 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1180 %vecext5 = extractelement <8 x i8> %x, i32 3
1181 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1182 %vecext7 = extractelement <8 x i8> %x, i32 4
1183 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1184 %vecext9 = extractelement <8 x i8> %x, i32 5
1185 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1186 %vecext11 = extractelement <8 x i8> %x, i32 6
1187 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1188 %vecext13 = extractelement <8 x i8> %x, i32 7
1189 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1190 %vecext15 = extractelement <8 x i8> %y, i32 0
1191 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1192 %vecext17 = extractelement <8 x i8> %y, i32 1
1193 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1194 %vecext19 = extractelement <8 x i8> %y, i32 2
1195 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1196 %vecext21 = extractelement <8 x i8> %y, i32 3
1197 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1198 %vecext23 = extractelement <8 x i8> %y, i32 4
1199 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1200 %vecext25 = extractelement <8 x i8> %y, i32 5
1201 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1202 %vecext27 = extractelement <8 x i8> %y, i32 6
1203 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1204 %vecext29 = extractelement <8 x i8> %y, i32 7
1205 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1206 ret <16 x i8> %vecinit30
1209 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1210 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1211 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1213 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1214 ret <8 x i16> %vecinit14
1217 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1218 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1219 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1221 %vecext = extractelement <4 x i16> %x, i32 0
1222 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1223 %vecext1 = extractelement <4 x i16> %x, i32 1
1224 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1225 %vecext3 = extractelement <4 x i16> %x, i32 2
1226 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1227 %vecext5 = extractelement <4 x i16> %x, i32 3
1228 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1229 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1230 ret <8 x i16> %vecinit14
1233 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1234 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1235 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1237 %vecext = extractelement <8 x i16> %x, i32 0
1238 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1239 %vecext1 = extractelement <8 x i16> %x, i32 1
1240 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1241 %vecext3 = extractelement <8 x i16> %x, i32 2
1242 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1243 %vecext5 = extractelement <8 x i16> %x, i32 3
1244 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1245 %vecext7 = extractelement <4 x i16> %y, i32 0
1246 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1247 %vecext9 = extractelement <4 x i16> %y, i32 1
1248 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1249 %vecext11 = extractelement <4 x i16> %y, i32 2
1250 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1251 %vecext13 = extractelement <4 x i16> %y, i32 3
1252 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1253 ret <8 x i16> %vecinit14
1256 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1257 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1258 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1260 %vecext = extractelement <4 x i16> %x, i32 0
1261 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1262 %vecext1 = extractelement <4 x i16> %x, i32 1
1263 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1264 %vecext3 = extractelement <4 x i16> %x, i32 2
1265 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1266 %vecext5 = extractelement <4 x i16> %x, i32 3
1267 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1268 %vecext7 = extractelement <4 x i16> %y, i32 0
1269 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1270 %vecext9 = extractelement <4 x i16> %y, i32 1
1271 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1272 %vecext11 = extractelement <4 x i16> %y, i32 2
1273 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1274 %vecext13 = extractelement <4 x i16> %y, i32 3
1275 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1276 ret <8 x i16> %vecinit14
1279 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1280 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1281 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1283 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1284 ret <4 x i32> %vecinit6
1287 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1288 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1289 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1291 %vecext = extractelement <2 x i32> %x, i32 0
1292 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1293 %vecext1 = extractelement <2 x i32> %x, i32 1
1294 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1295 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1296 ret <4 x i32> %vecinit6
1299 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1300 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1301 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1303 %vecext = extractelement <4 x i32> %x, i32 0
1304 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1305 %vecext1 = extractelement <4 x i32> %x, i32 1
1306 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1307 %vecext3 = extractelement <2 x i32> %y, i32 0
1308 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1309 %vecext5 = extractelement <2 x i32> %y, i32 1
1310 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1311 ret <4 x i32> %vecinit6
1314 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1315 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1316 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1318 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1319 ret <4 x i32> %vecinit6
1322 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1323 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1324 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1326 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1327 ret <2 x i64> %vecinit2
1330 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1331 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1332 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1334 %vecext = extractelement <1 x i64> %x, i32 0
1335 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1336 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1337 ret <2 x i64> %vecinit2
1340 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1341 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1342 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1344 %vecext = extractelement <2 x i64> %x, i32 0
1345 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1346 %vecext1 = extractelement <1 x i64> %y, i32 0
1347 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1348 ret <2 x i64> %vecinit2
1351 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1352 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1353 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1355 %vecext = extractelement <1 x i64> %x, i32 0
1356 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1357 %vecext1 = extractelement <1 x i64> %y, i32 0
1358 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1359 ret <2 x i64> %vecinit2
1363 define <4 x i16> @concat_vector_v4i16_const() {
1364 ; CHECK-LABEL: concat_vector_v4i16_const:
1365 ; CHECK: movi {{d[0-9]+}}, #0
1366 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1370 define <4 x i16> @concat_vector_v4i16_const_one() {
1371 ; CHECK-LABEL: concat_vector_v4i16_const_one:
1372 ; CHECK: movi {{v[0-9]+}}.4h, #0x1
1373 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1377 define <4 x i32> @concat_vector_v4i32_const() {
1378 ; CHECK-LABEL: concat_vector_v4i32_const:
1379 ; CHECK: movi {{v[0-9]+}}.2d, #0
1380 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1384 define <8 x i8> @concat_vector_v8i8_const() {
1385 ; CHECK-LABEL: concat_vector_v8i8_const:
1386 ; CHECK: movi {{d[0-9]+}}, #0
1387 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1391 define <8 x i16> @concat_vector_v8i16_const() {
1392 ; CHECK-LABEL: concat_vector_v8i16_const:
1393 ; CHECK: movi {{v[0-9]+}}.2d, #0
1394 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1398 define <8 x i16> @concat_vector_v8i16_const_one() {
1399 ; CHECK-LABEL: concat_vector_v8i16_const_one:
1400 ; CHECK: movi {{v[0-9]+}}.8h, #0x1
1401 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1405 define <16 x i8> @concat_vector_v16i8_const() {
1406 ; CHECK-LABEL: concat_vector_v16i8_const:
1407 ; CHECK: movi {{v[0-9]+}}.2d, #0
1408 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1412 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1413 ; CHECK-LABEL: concat_vector_v4i16:
1414 ; CHECK: dup v0.4h, v0.h[0]
1415 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1419 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1420 ; CHECK-LABEL: concat_vector_v4i32:
1421 ; CHECK: dup v0.4s, v0.s[0]
1422 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1426 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1427 ; CHECK-LABEL: concat_vector_v8i8:
1428 ; CHECK: dup v0.8b, v0.b[0]
1429 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1433 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1434 ; CHECK-LABEL: concat_vector_v8i16:
1435 ; CHECK: dup v0.8h, v0.h[0]
1436 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1440 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1441 ; CHECK-LABEL: concat_vector_v16i8:
1442 ; CHECK: dup v0.16b, v0.b[0]
1443 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer