1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5 ;CHECK: ins {{v[0-31]+}}.b[15], {{w[0-31]+}}
6 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
10 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
11 ;CHECK: ins {{v[0-31]+}}.h[6], {{w[0-31]+}}
12 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
16 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
17 ;CHECK: ins {{v[0-31]+}}.s[2], {{w[0-31]+}}
18 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
22 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
23 ;CHECK: ins {{v[0-31]+}}.d[1], {{x[0-31]+}}
24 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
28 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
29 ;CHECK: ins {{v[0-31]+}}.b[5], {{w[0-31]+}}
30 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
34 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
35 ;CHECK: ins {{v[0-31]+}}.h[3], {{w[0-31]+}}
36 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
40 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
41 ;CHECK: ins {{v[0-31]+}}.s[1], {{w[0-31]+}}
42 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
46 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
47 ;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2]
48 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
49 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
53 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
54 ;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2]
55 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
56 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
60 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
61 ;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2]
62 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
63 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
67 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
68 ;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0]
69 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
70 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
74 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
75 ;CHECK: ins {{v[0-31]+}}.b[4], {{v[0-31]+}}.b[2]
76 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
77 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
81 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
82 ;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2]
83 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
84 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
88 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
89 ;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0]
90 %tmp3 = extractelement <2 x i32> %tmp1, i32 0
91 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
95 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
96 ;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0]
97 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
98 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
102 define i32 @umovw16b(<16 x i8> %tmp1) {
103 ;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[8]
104 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
105 %tmp4 = zext i8 %tmp3 to i32
109 define i32 @umovw8h(<8 x i16> %tmp1) {
110 ;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
111 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
112 %tmp4 = zext i16 %tmp3 to i32
116 define i32 @umovw4s(<4 x i32> %tmp1) {
117 ;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[2]
118 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
122 define i64 @umovx2d(<2 x i64> %tmp1) {
123 ;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0]
124 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
128 define i32 @umovw8b(<8 x i8> %tmp1) {
129 ;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[7]
130 %tmp3 = extractelement <8 x i8> %tmp1, i32 7
131 %tmp4 = zext i8 %tmp3 to i32
135 define i32 @umovw4h(<4 x i16> %tmp1) {
136 ;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
137 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
138 %tmp4 = zext i16 %tmp3 to i32
142 define i32 @umovw2s(<2 x i32> %tmp1) {
143 ;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[1]
144 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
148 define i64 @umovx1d(<1 x i64> %tmp1) {
149 ;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}}
150 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
154 define i32 @smovw16b(<16 x i8> %tmp1) {
155 ;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[8]
156 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
157 %tmp4 = sext i8 %tmp3 to i32
158 %tmp5 = add i32 5, %tmp4
162 define i32 @smovw8h(<8 x i16> %tmp1) {
163 ;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
164 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
165 %tmp4 = sext i16 %tmp3 to i32
166 %tmp5 = add i32 5, %tmp4
170 define i32 @smovx16b(<16 x i8> %tmp1) {
171 ;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[8]
172 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
173 %tmp4 = sext i8 %tmp3 to i32
177 define i32 @smovx8h(<8 x i16> %tmp1) {
178 ;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2]
179 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
180 %tmp4 = sext i16 %tmp3 to i32
184 define i64 @smovx4s(<4 x i32> %tmp1) {
185 ;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[2]
186 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
187 %tmp4 = sext i32 %tmp3 to i64
191 define i32 @smovw8b(<8 x i8> %tmp1) {
192 ;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[4]
193 %tmp3 = extractelement <8 x i8> %tmp1, i32 4
194 %tmp4 = sext i8 %tmp3 to i32
195 %tmp5 = add i32 5, %tmp4
199 define i32 @smovw4h(<4 x i16> %tmp1) {
200 ;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
201 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
202 %tmp4 = sext i16 %tmp3 to i32
203 %tmp5 = add i32 5, %tmp4
207 define i32 @smovx8b(<8 x i8> %tmp1) {
208 ;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[6]
209 %tmp3 = extractelement <8 x i8> %tmp1, i32 6
210 %tmp4 = sext i8 %tmp3 to i32
214 define i32 @smovx4h(<4 x i16> %tmp1) {
215 ;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2]
216 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
217 %tmp4 = sext i16 %tmp3 to i32
221 define i64 @smovx2s(<2 x i32> %tmp1) {
222 ;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[1]
223 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
224 %tmp4 = sext i32 %tmp3 to i64
228 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
229 ;CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
230 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
231 ret <8 x i8> %vset_lane
234 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
235 ;CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
236 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
237 ret <16 x i8> %vset_lane
240 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
241 ;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
242 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
243 ret <8 x i8> %vset_lane
246 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
247 ;CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
248 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
249 ret <16 x i8> %vset_lane
252 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
253 ;CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
254 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
255 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
256 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
257 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
258 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
259 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
260 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
261 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
262 ret <8 x i8> %vecinit7.i
265 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
266 ;CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
267 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
268 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
269 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
270 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
271 ret <4 x i16> %vecinit3.i
274 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
275 ;CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
276 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
277 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
278 ret <2 x i32> %vecinit1.i
281 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
282 ;CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
283 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
284 ret <1 x i64> %vecinit.i
287 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
288 ;CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
289 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
290 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
291 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
292 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
293 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
294 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
295 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
296 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
297 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
298 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
299 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
300 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
301 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
302 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
303 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
304 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
305 ret <16 x i8> %vecinit15.i
308 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
309 ;CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
310 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
311 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
312 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
313 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
314 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
315 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
316 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
317 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
318 ret <8 x i16> %vecinit7.i
321 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
322 ;CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
323 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
324 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
325 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
326 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
327 ret <4 x i32> %vecinit3.i
330 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
331 ;CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
332 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
333 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
334 ret <2 x i64> %vecinit1.i
337 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
338 ;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
339 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
340 ret <8 x i8> %shuffle
343 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
344 ;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
345 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
346 ret <4 x i16> %shuffle
349 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
350 ;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
351 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
352 ret <2 x i32> %shuffle
355 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
356 ;CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
357 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
358 ret <16 x i8> %shuffle
361 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
362 ;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
363 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
364 ret <8 x i16> %shuffle
367 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
368 ;CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
369 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
370 ret <4 x i32> %shuffle
373 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
374 ;CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
375 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
376 ret <2 x i64> %shuffle
379 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
380 ;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
381 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
382 ret <8 x i8> %shuffle
385 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
386 ;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
387 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
388 ret <4 x i16> %shuffle
391 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
392 ;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
393 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
394 ret <2 x i32> %shuffle
397 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
398 ;CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
399 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
400 ret <16 x i8> %shuffle
403 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
404 ;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
405 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
406 ret <8 x i16> %shuffle
409 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
410 ;CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
411 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
412 ret <4 x i32> %shuffle
415 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
416 ;CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
417 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
418 ret <2 x i64> %shuffle