1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9 target triple = "x86_64-unknown-unknown"
11 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
12 ; SSE-LABEL: shuffle_v2i64_00:
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
17 ; AVX1-LABEL: shuffle_v2i64_00:
19 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
22 ; AVX2-LABEL: shuffle_v2i64_00:
24 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
26 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
27 ret <2 x i64> %shuffle
29 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
30 ; SSE-LABEL: shuffle_v2i64_10:
32 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
35 ; AVX-LABEL: shuffle_v2i64_10:
37 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
39 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
40 ret <2 x i64> %shuffle
42 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
43 ; SSE-LABEL: shuffle_v2i64_11:
45 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
48 ; AVX-LABEL: shuffle_v2i64_11:
50 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
52 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
53 ret <2 x i64> %shuffle
55 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
56 ; SSE-LABEL: shuffle_v2i64_22:
58 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
61 ; AVX1-LABEL: shuffle_v2i64_22:
63 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
66 ; AVX2-LABEL: shuffle_v2i64_22:
68 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
70 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
71 ret <2 x i64> %shuffle
73 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
74 ; SSE-LABEL: shuffle_v2i64_32:
76 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
79 ; AVX-LABEL: shuffle_v2i64_32:
81 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
83 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
84 ret <2 x i64> %shuffle
86 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
87 ; SSE-LABEL: shuffle_v2i64_33:
89 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
92 ; AVX-LABEL: shuffle_v2i64_33:
94 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
96 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
97 ret <2 x i64> %shuffle
100 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
101 ; SSE2-LABEL: shuffle_v2f64_00:
103 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
106 ; SSE3-LABEL: shuffle_v2f64_00:
108 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
111 ; SSSE3-LABEL: shuffle_v2f64_00:
113 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
116 ; SSE41-LABEL: shuffle_v2f64_00:
118 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
121 ; AVX-LABEL: shuffle_v2f64_00:
123 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
125 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
126 ret <2 x double> %shuffle
128 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
129 ; SSE-LABEL: shuffle_v2f64_10:
131 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
134 ; AVX-LABEL: shuffle_v2f64_10:
136 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
138 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
139 ret <2 x double> %shuffle
141 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
142 ; SSE-LABEL: shuffle_v2f64_11:
144 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
147 ; AVX-LABEL: shuffle_v2f64_11:
149 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
152 ret <2 x double> %shuffle
154 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
155 ; SSE2-LABEL: shuffle_v2f64_22:
157 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
158 ; SSE2-NEXT: movaps %xmm1, %xmm0
161 ; SSE3-LABEL: shuffle_v2f64_22:
163 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
164 ; SSE3-NEXT: movapd %xmm1, %xmm0
167 ; SSSE3-LABEL: shuffle_v2f64_22:
169 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
170 ; SSSE3-NEXT: movapd %xmm1, %xmm0
173 ; SSE41-LABEL: shuffle_v2f64_22:
175 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
176 ; SSE41-NEXT: movapd %xmm1, %xmm0
179 ; AVX-LABEL: shuffle_v2f64_22:
181 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
183 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
184 ret <2 x double> %shuffle
186 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
187 ; SSE-LABEL: shuffle_v2f64_32:
189 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
190 ; SSE-NEXT: movapd %xmm1, %xmm0
193 ; AVX-LABEL: shuffle_v2f64_32:
195 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
197 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
198 ret <2 x double> %shuffle
200 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
201 ; SSE-LABEL: shuffle_v2f64_33:
203 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
204 ; SSE-NEXT: movaps %xmm1, %xmm0
207 ; AVX-LABEL: shuffle_v2f64_33:
209 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
211 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
212 ret <2 x double> %shuffle
214 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
215 ; SSE2-LABEL: shuffle_v2f64_03:
217 ; SSE2-NEXT: movsd %xmm0, %xmm1
218 ; SSE2-NEXT: movaps %xmm1, %xmm0
221 ; SSE3-LABEL: shuffle_v2f64_03:
223 ; SSE3-NEXT: movsd %xmm0, %xmm1
224 ; SSE3-NEXT: movaps %xmm1, %xmm0
227 ; SSSE3-LABEL: shuffle_v2f64_03:
229 ; SSSE3-NEXT: movsd %xmm0, %xmm1
230 ; SSSE3-NEXT: movaps %xmm1, %xmm0
233 ; SSE41-LABEL: shuffle_v2f64_03:
235 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
238 ; AVX-LABEL: shuffle_v2f64_03:
240 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
242 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
243 ret <2 x double> %shuffle
245 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
246 ; SSE2-LABEL: shuffle_v2f64_21:
248 ; SSE2-NEXT: movsd %xmm1, %xmm0
251 ; SSE3-LABEL: shuffle_v2f64_21:
253 ; SSE3-NEXT: movsd %xmm1, %xmm0
256 ; SSSE3-LABEL: shuffle_v2f64_21:
258 ; SSSE3-NEXT: movsd %xmm1, %xmm0
261 ; SSE41-LABEL: shuffle_v2f64_21:
263 ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
264 ; SSE41-NEXT: movapd %xmm1, %xmm0
267 ; AVX-LABEL: shuffle_v2f64_21:
269 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
271 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
272 ret <2 x double> %shuffle
276 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
277 ; SSE-LABEL: shuffle_v2i64_02:
279 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
282 ; AVX-LABEL: shuffle_v2i64_02:
284 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
286 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
287 ret <2 x i64> %shuffle
289 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
290 ; SSE-LABEL: shuffle_v2i64_02_copy:
292 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
293 ; SSE-NEXT: movdqa %xmm1, %xmm0
296 ; AVX-LABEL: shuffle_v2i64_02_copy:
298 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
300 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
301 ret <2 x i64> %shuffle
303 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
304 ; SSE2-LABEL: shuffle_v2i64_03:
306 ; SSE2-NEXT: movsd %xmm0, %xmm1
307 ; SSE2-NEXT: movaps %xmm1, %xmm0
310 ; SSE3-LABEL: shuffle_v2i64_03:
312 ; SSE3-NEXT: movsd %xmm0, %xmm1
313 ; SSE3-NEXT: movaps %xmm1, %xmm0
316 ; SSSE3-LABEL: shuffle_v2i64_03:
318 ; SSSE3-NEXT: movsd %xmm0, %xmm1
319 ; SSSE3-NEXT: movaps %xmm1, %xmm0
322 ; SSE41-LABEL: shuffle_v2i64_03:
324 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
327 ; AVX1-LABEL: shuffle_v2i64_03:
329 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
332 ; AVX2-LABEL: shuffle_v2i64_03:
334 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
336 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
337 ret <2 x i64> %shuffle
339 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
340 ; SSE2-LABEL: shuffle_v2i64_03_copy:
342 ; SSE2-NEXT: movsd %xmm1, %xmm2
343 ; SSE2-NEXT: movaps %xmm2, %xmm0
346 ; SSE3-LABEL: shuffle_v2i64_03_copy:
348 ; SSE3-NEXT: movsd %xmm1, %xmm2
349 ; SSE3-NEXT: movaps %xmm2, %xmm0
352 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
354 ; SSSE3-NEXT: movsd %xmm1, %xmm2
355 ; SSSE3-NEXT: movaps %xmm2, %xmm0
358 ; SSE41-LABEL: shuffle_v2i64_03_copy:
360 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
361 ; SSE41-NEXT: movdqa %xmm1, %xmm0
364 ; AVX1-LABEL: shuffle_v2i64_03_copy:
366 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
369 ; AVX2-LABEL: shuffle_v2i64_03_copy:
371 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
373 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
374 ret <2 x i64> %shuffle
376 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
377 ; SSE2-LABEL: shuffle_v2i64_12:
379 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
382 ; SSE3-LABEL: shuffle_v2i64_12:
384 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
387 ; SSSE3-LABEL: shuffle_v2i64_12:
389 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
390 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
393 ; SSE41-LABEL: shuffle_v2i64_12:
395 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
396 ; SSE41-NEXT: movdqa %xmm1, %xmm0
399 ; AVX-LABEL: shuffle_v2i64_12:
401 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
403 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
404 ret <2 x i64> %shuffle
406 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
407 ; SSE2-LABEL: shuffle_v2i64_12_copy:
409 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
410 ; SSE2-NEXT: movapd %xmm1, %xmm0
413 ; SSE3-LABEL: shuffle_v2i64_12_copy:
415 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
416 ; SSE3-NEXT: movapd %xmm1, %xmm0
419 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
421 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
422 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
425 ; SSE41-LABEL: shuffle_v2i64_12_copy:
427 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
428 ; SSE41-NEXT: movdqa %xmm2, %xmm0
431 ; AVX-LABEL: shuffle_v2i64_12_copy:
433 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
435 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
436 ret <2 x i64> %shuffle
438 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
439 ; SSE-LABEL: shuffle_v2i64_13:
441 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
444 ; AVX-LABEL: shuffle_v2i64_13:
446 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
448 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
449 ret <2 x i64> %shuffle
451 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
452 ; SSE-LABEL: shuffle_v2i64_13_copy:
454 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
455 ; SSE-NEXT: movdqa %xmm1, %xmm0
458 ; AVX-LABEL: shuffle_v2i64_13_copy:
460 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
462 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
463 ret <2 x i64> %shuffle
465 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
466 ; SSE-LABEL: shuffle_v2i64_20:
468 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
469 ; SSE-NEXT: movdqa %xmm1, %xmm0
472 ; AVX-LABEL: shuffle_v2i64_20:
474 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
476 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
477 ret <2 x i64> %shuffle
479 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
480 ; SSE-LABEL: shuffle_v2i64_20_copy:
482 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
483 ; SSE-NEXT: movdqa %xmm2, %xmm0
486 ; AVX-LABEL: shuffle_v2i64_20_copy:
488 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
490 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
491 ret <2 x i64> %shuffle
493 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
494 ; SSE2-LABEL: shuffle_v2i64_21:
496 ; SSE2-NEXT: movsd %xmm1, %xmm0
499 ; SSE3-LABEL: shuffle_v2i64_21:
501 ; SSE3-NEXT: movsd %xmm1, %xmm0
504 ; SSSE3-LABEL: shuffle_v2i64_21:
506 ; SSSE3-NEXT: movsd %xmm1, %xmm0
509 ; SSE41-LABEL: shuffle_v2i64_21:
511 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
512 ; SSE41-NEXT: movdqa %xmm1, %xmm0
515 ; AVX1-LABEL: shuffle_v2i64_21:
517 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
520 ; AVX2-LABEL: shuffle_v2i64_21:
522 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
524 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
525 ret <2 x i64> %shuffle
527 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
528 ; SSE2-LABEL: shuffle_v2i64_21_copy:
530 ; SSE2-NEXT: movsd %xmm2, %xmm1
531 ; SSE2-NEXT: movaps %xmm1, %xmm0
534 ; SSE3-LABEL: shuffle_v2i64_21_copy:
536 ; SSE3-NEXT: movsd %xmm2, %xmm1
537 ; SSE3-NEXT: movaps %xmm1, %xmm0
540 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
542 ; SSSE3-NEXT: movsd %xmm2, %xmm1
543 ; SSSE3-NEXT: movaps %xmm1, %xmm0
546 ; SSE41-LABEL: shuffle_v2i64_21_copy:
548 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
549 ; SSE41-NEXT: movdqa %xmm2, %xmm0
552 ; AVX1-LABEL: shuffle_v2i64_21_copy:
554 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
557 ; AVX2-LABEL: shuffle_v2i64_21_copy:
559 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
561 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
562 ret <2 x i64> %shuffle
564 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
565 ; SSE2-LABEL: shuffle_v2i64_30:
567 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
568 ; SSE2-NEXT: movapd %xmm1, %xmm0
571 ; SSE3-LABEL: shuffle_v2i64_30:
573 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
574 ; SSE3-NEXT: movapd %xmm1, %xmm0
577 ; SSSE3-LABEL: shuffle_v2i64_30:
579 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
582 ; SSE41-LABEL: shuffle_v2i64_30:
584 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
587 ; AVX-LABEL: shuffle_v2i64_30:
589 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
591 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
592 ret <2 x i64> %shuffle
594 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
595 ; SSE2-LABEL: shuffle_v2i64_30_copy:
597 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
598 ; SSE2-NEXT: movapd %xmm2, %xmm0
601 ; SSE3-LABEL: shuffle_v2i64_30_copy:
603 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
604 ; SSE3-NEXT: movapd %xmm2, %xmm0
607 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
609 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
610 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
613 ; SSE41-LABEL: shuffle_v2i64_30_copy:
615 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
616 ; SSE41-NEXT: movdqa %xmm1, %xmm0
619 ; AVX-LABEL: shuffle_v2i64_30_copy:
621 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
623 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
624 ret <2 x i64> %shuffle
626 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
627 ; SSE-LABEL: shuffle_v2i64_31:
629 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
630 ; SSE-NEXT: movdqa %xmm1, %xmm0
633 ; AVX-LABEL: shuffle_v2i64_31:
635 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
637 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
638 ret <2 x i64> %shuffle
640 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
641 ; SSE-LABEL: shuffle_v2i64_31_copy:
643 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
644 ; SSE-NEXT: movdqa %xmm2, %xmm0
647 ; AVX-LABEL: shuffle_v2i64_31_copy:
649 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
651 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
652 ret <2 x i64> %shuffle
655 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
656 ; SSE-LABEL: shuffle_v2i64_0z:
658 ; SSE-NEXT: movq %xmm0, %xmm0
661 ; AVX-LABEL: shuffle_v2i64_0z:
663 ; AVX-NEXT: vmovq %xmm0, %xmm0
665 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
666 ret <2 x i64> %shuffle
669 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
670 ; SSE-LABEL: shuffle_v2i64_1z:
672 ; SSE-NEXT: pxor %xmm1, %xmm1
673 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
676 ; AVX-LABEL: shuffle_v2i64_1z:
678 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
679 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
681 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
682 ret <2 x i64> %shuffle
685 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
686 ; SSE-LABEL: shuffle_v2i64_z0:
688 ; SSE-NEXT: movq %xmm0, %xmm0
689 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
692 ; AVX-LABEL: shuffle_v2i64_z0:
694 ; AVX-NEXT: vmovq %xmm0, %xmm0
695 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
697 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
698 ret <2 x i64> %shuffle
701 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
702 ; SSE2-LABEL: shuffle_v2i64_z1:
704 ; SSE2-NEXT: xorps %xmm1, %xmm1
705 ; SSE2-NEXT: movsd %xmm1, %xmm0
708 ; SSE3-LABEL: shuffle_v2i64_z1:
710 ; SSE3-NEXT: xorps %xmm1, %xmm1
711 ; SSE3-NEXT: movsd %xmm1, %xmm0
714 ; SSSE3-LABEL: shuffle_v2i64_z1:
716 ; SSSE3-NEXT: xorps %xmm1, %xmm1
717 ; SSSE3-NEXT: movsd %xmm1, %xmm0
720 ; SSE41-LABEL: shuffle_v2i64_z1:
722 ; SSE41-NEXT: pxor %xmm1, %xmm1
723 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
724 ; SSE41-NEXT: movdqa %xmm1, %xmm0
727 ; AVX1-LABEL: shuffle_v2i64_z1:
729 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
730 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
733 ; AVX2-LABEL: shuffle_v2i64_z1:
735 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
736 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
738 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
739 ret <2 x i64> %shuffle
742 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
743 ; SSE-LABEL: shuffle_v2f64_0z:
745 ; SSE-NEXT: movq %xmm0, %xmm0
748 ; AVX-LABEL: shuffle_v2f64_0z:
750 ; AVX-NEXT: vmovq %xmm0, %xmm0
752 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
753 ret <2 x double> %shuffle
756 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
757 ; SSE-LABEL: shuffle_v2f64_1z:
759 ; SSE-NEXT: xorpd %xmm1, %xmm1
760 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
763 ; AVX-LABEL: shuffle_v2f64_1z:
765 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
766 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
768 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
769 ret <2 x double> %shuffle
772 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
773 ; SSE-LABEL: shuffle_v2f64_z0:
775 ; SSE-NEXT: xorpd %xmm1, %xmm1
776 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
777 ; SSE-NEXT: movapd %xmm1, %xmm0
780 ; AVX-LABEL: shuffle_v2f64_z0:
782 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
783 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
785 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
786 ret <2 x double> %shuffle
789 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
790 ; SSE2-LABEL: shuffle_v2f64_z1:
792 ; SSE2-NEXT: xorps %xmm1, %xmm1
793 ; SSE2-NEXT: movsd %xmm1, %xmm0
796 ; SSE3-LABEL: shuffle_v2f64_z1:
798 ; SSE3-NEXT: xorps %xmm1, %xmm1
799 ; SSE3-NEXT: movsd %xmm1, %xmm0
802 ; SSSE3-LABEL: shuffle_v2f64_z1:
804 ; SSSE3-NEXT: xorps %xmm1, %xmm1
805 ; SSSE3-NEXT: movsd %xmm1, %xmm0
808 ; SSE41-LABEL: shuffle_v2f64_z1:
810 ; SSE41-NEXT: xorpd %xmm1, %xmm1
811 ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
812 ; SSE41-NEXT: movapd %xmm1, %xmm0
815 ; AVX-LABEL: shuffle_v2f64_z1:
817 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
818 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
820 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
821 ret <2 x double> %shuffle
824 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
825 ; SSE-LABEL: insert_reg_and_zero_v2i64:
827 ; SSE-NEXT: movd %rdi, %xmm0
830 ; AVX-LABEL: insert_reg_and_zero_v2i64:
832 ; AVX-NEXT: vmovq %rdi, %xmm0
834 %v = insertelement <2 x i64> undef, i64 %a, i32 0
835 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
836 ret <2 x i64> %shuffle
839 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
840 ; SSE-LABEL: insert_mem_and_zero_v2i64:
842 ; SSE-NEXT: movq (%rdi), %xmm0
845 ; AVX-LABEL: insert_mem_and_zero_v2i64:
847 ; AVX-NEXT: vmovq (%rdi), %xmm0
850 %v = insertelement <2 x i64> undef, i64 %a, i32 0
851 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
852 ret <2 x i64> %shuffle
855 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
856 ; SSE-LABEL: insert_reg_and_zero_v2f64:
858 ; SSE-NEXT: movq %xmm0, %xmm0
861 ; AVX-LABEL: insert_reg_and_zero_v2f64:
863 ; AVX-NEXT: vmovq %xmm0, %xmm0
865 %v = insertelement <2 x double> undef, double %a, i32 0
866 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
867 ret <2 x double> %shuffle
870 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
871 ; SSE-LABEL: insert_mem_and_zero_v2f64:
873 ; SSE-NEXT: movsd (%rdi), %xmm0
876 ; AVX-LABEL: insert_mem_and_zero_v2f64:
878 ; AVX-NEXT: vmovsd (%rdi), %xmm0
880 %a = load double* %ptr
881 %v = insertelement <2 x double> undef, double %a, i32 0
882 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
883 ret <2 x double> %shuffle
886 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
887 ; SSE2-LABEL: insert_reg_lo_v2i64:
889 ; SSE2-NEXT: movd %rdi, %xmm1
890 ; SSE2-NEXT: movsd %xmm1, %xmm0
893 ; SSE3-LABEL: insert_reg_lo_v2i64:
895 ; SSE3-NEXT: movd %rdi, %xmm1
896 ; SSE3-NEXT: movsd %xmm1, %xmm0
899 ; SSSE3-LABEL: insert_reg_lo_v2i64:
901 ; SSSE3-NEXT: movd %rdi, %xmm1
902 ; SSSE3-NEXT: movsd %xmm1, %xmm0
905 ; SSE41-LABEL: insert_reg_lo_v2i64:
907 ; SSE41-NEXT: movd %rdi, %xmm1
908 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
909 ; SSE41-NEXT: movdqa %xmm1, %xmm0
912 ; AVX1-LABEL: insert_reg_lo_v2i64:
914 ; AVX1-NEXT: vmovq %rdi, %xmm1
915 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
918 ; AVX2-LABEL: insert_reg_lo_v2i64:
920 ; AVX2-NEXT: vmovq %rdi, %xmm1
921 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
923 %v = insertelement <2 x i64> undef, i64 %a, i32 0
924 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
925 ret <2 x i64> %shuffle
928 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
929 ; SSE2-LABEL: insert_mem_lo_v2i64:
931 ; SSE2-NEXT: movlpd (%rdi), %xmm0
934 ; SSE3-LABEL: insert_mem_lo_v2i64:
936 ; SSE3-NEXT: movlpd (%rdi), %xmm0
939 ; SSSE3-LABEL: insert_mem_lo_v2i64:
941 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
944 ; SSE41-LABEL: insert_mem_lo_v2i64:
946 ; SSE41-NEXT: movq (%rdi), %xmm1
947 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
948 ; SSE41-NEXT: movdqa %xmm1, %xmm0
951 ; AVX1-LABEL: insert_mem_lo_v2i64:
953 ; AVX1-NEXT: vmovq (%rdi), %xmm1
954 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
957 ; AVX2-LABEL: insert_mem_lo_v2i64:
959 ; AVX2-NEXT: vmovq (%rdi), %xmm1
960 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
963 %v = insertelement <2 x i64> undef, i64 %a, i32 0
964 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
965 ret <2 x i64> %shuffle
968 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
969 ; SSE-LABEL: insert_reg_hi_v2i64:
971 ; SSE-NEXT: movd %rdi, %xmm1
972 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
975 ; AVX-LABEL: insert_reg_hi_v2i64:
977 ; AVX-NEXT: vmovq %rdi, %xmm1
978 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
980 %v = insertelement <2 x i64> undef, i64 %a, i32 0
981 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
982 ret <2 x i64> %shuffle
985 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
986 ; SSE-LABEL: insert_mem_hi_v2i64:
988 ; SSE-NEXT: movq (%rdi), %xmm1
989 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
992 ; AVX-LABEL: insert_mem_hi_v2i64:
994 ; AVX-NEXT: vmovq (%rdi), %xmm1
995 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
998 %v = insertelement <2 x i64> undef, i64 %a, i32 0
999 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1000 ret <2 x i64> %shuffle
1003 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1004 ; SSE-LABEL: insert_reg_lo_v2f64:
1006 ; SSE-NEXT: movsd %xmm0, %xmm1
1007 ; SSE-NEXT: movaps %xmm1, %xmm0
1010 ; AVX-LABEL: insert_reg_lo_v2f64:
1012 ; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1014 %v = insertelement <2 x double> undef, double %a, i32 0
1015 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1016 ret <2 x double> %shuffle
1019 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1020 ; SSE-LABEL: insert_mem_lo_v2f64:
1022 ; SSE-NEXT: movlpd (%rdi), %xmm0
1025 ; AVX-LABEL: insert_mem_lo_v2f64:
1027 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1029 %a = load double* %ptr
1030 %v = insertelement <2 x double> undef, double %a, i32 0
1031 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1032 ret <2 x double> %shuffle
1035 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1036 ; SSE-LABEL: insert_reg_hi_v2f64:
1038 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1039 ; SSE-NEXT: movapd %xmm1, %xmm0
1042 ; AVX-LABEL: insert_reg_hi_v2f64:
1044 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1046 %v = insertelement <2 x double> undef, double %a, i32 0
1047 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1048 ret <2 x double> %shuffle
1051 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1052 ; SSE-LABEL: insert_mem_hi_v2f64:
1054 ; SSE-NEXT: movhpd (%rdi), %xmm0
1057 ; AVX-LABEL: insert_mem_hi_v2f64:
1059 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1061 %a = load double* %ptr
1062 %v = insertelement <2 x double> undef, double %a, i32 0
1063 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1064 ret <2 x double> %shuffle
1067 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1068 ; FIXME: We should match movddup for SSE3 and higher here.
1070 ; SSE2-LABEL: insert_dup_reg_v2f64:
1072 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1075 ; SSE3-LABEL: insert_dup_reg_v2f64:
1077 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1080 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1082 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1085 ; SSE41-LABEL: insert_dup_reg_v2f64:
1087 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1090 ; AVX-LABEL: insert_dup_reg_v2f64:
1092 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1094 %v = insertelement <2 x double> undef, double %a, i32 0
1095 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1096 ret <2 x double> %shuffle
1098 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1099 ; SSE2-LABEL: insert_dup_mem_v2f64:
1101 ; SSE2-NEXT: movsd (%rdi), %xmm0
1102 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1105 ; SSE3-LABEL: insert_dup_mem_v2f64:
1107 ; SSE3-NEXT: movddup (%rdi), %xmm0
1110 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1112 ; SSSE3-NEXT: movddup (%rdi), %xmm0
1115 ; SSE41-LABEL: insert_dup_mem_v2f64:
1117 ; SSE41-NEXT: movddup (%rdi), %xmm0
1120 ; AVX-LABEL: insert_dup_mem_v2f64:
1122 ; AVX-NEXT: vmovddup (%rdi), %xmm0
1124 %a = load double* %ptr
1125 %v = insertelement <2 x double> undef, double %a, i32 0
1126 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1127 ret <2 x double> %shuffle
1130 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1131 ; SSE-LABEL: shuffle_mem_v2f64_10:
1133 ; SSE-NEXT: movapd (%rdi), %xmm0
1134 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1137 ; AVX-LABEL: shuffle_mem_v2f64_10:
1139 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1141 %a = load <2 x double>* %ptr
1142 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1143 ret <2 x double> %shuffle