1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9 target triple = "x86_64-unknown-unknown"
11 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
12 ; SSE-LABEL: shuffle_v2i64_00:
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
17 ; AVX1-LABEL: shuffle_v2i64_00:
19 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
22 ; AVX2-LABEL: shuffle_v2i64_00:
24 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
26 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
27 ret <2 x i64> %shuffle
29 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
30 ; SSE-LABEL: shuffle_v2i64_10:
32 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
35 ; AVX-LABEL: shuffle_v2i64_10:
37 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
39 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
40 ret <2 x i64> %shuffle
42 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
43 ; SSE-LABEL: shuffle_v2i64_11:
45 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
48 ; AVX-LABEL: shuffle_v2i64_11:
50 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
52 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
53 ret <2 x i64> %shuffle
55 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
56 ; SSE-LABEL: shuffle_v2i64_22:
58 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
61 ; AVX1-LABEL: shuffle_v2i64_22:
63 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
66 ; AVX2-LABEL: shuffle_v2i64_22:
68 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
70 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
71 ret <2 x i64> %shuffle
73 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
74 ; SSE-LABEL: shuffle_v2i64_32:
76 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
79 ; AVX-LABEL: shuffle_v2i64_32:
81 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
83 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
84 ret <2 x i64> %shuffle
86 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
87 ; SSE-LABEL: shuffle_v2i64_33:
89 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
92 ; AVX-LABEL: shuffle_v2i64_33:
94 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
96 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
97 ret <2 x i64> %shuffle
100 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
101 ; SSE2-LABEL: shuffle_v2f64_00:
103 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
106 ; SSE3-LABEL: shuffle_v2f64_00:
108 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
111 ; SSSE3-LABEL: shuffle_v2f64_00:
113 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
116 ; SSE41-LABEL: shuffle_v2f64_00:
118 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
121 ; AVX-LABEL: shuffle_v2f64_00:
123 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
125 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
126 ret <2 x double> %shuffle
128 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
129 ; SSE-LABEL: shuffle_v2f64_10:
131 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
134 ; AVX-LABEL: shuffle_v2f64_10:
136 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
138 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
139 ret <2 x double> %shuffle
141 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
142 ; SSE-LABEL: shuffle_v2f64_11:
144 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
147 ; AVX-LABEL: shuffle_v2f64_11:
149 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
152 ret <2 x double> %shuffle
154 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
155 ; SSE2-LABEL: shuffle_v2f64_22:
157 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
158 ; SSE2-NEXT: movaps %xmm1, %xmm0
161 ; SSE3-LABEL: shuffle_v2f64_22:
163 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
164 ; SSE3-NEXT: movapd %xmm1, %xmm0
167 ; SSSE3-LABEL: shuffle_v2f64_22:
169 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
170 ; SSSE3-NEXT: movapd %xmm1, %xmm0
173 ; SSE41-LABEL: shuffle_v2f64_22:
175 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
176 ; SSE41-NEXT: movapd %xmm1, %xmm0
179 ; AVX-LABEL: shuffle_v2f64_22:
181 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
183 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
184 ret <2 x double> %shuffle
186 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
187 ; SSE-LABEL: shuffle_v2f64_32:
189 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
192 ; AVX-LABEL: shuffle_v2f64_32:
194 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
196 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
197 ret <2 x double> %shuffle
199 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
200 ; SSE-LABEL: shuffle_v2f64_33:
202 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
203 ; SSE-NEXT: movaps %xmm1, %xmm0
206 ; AVX-LABEL: shuffle_v2f64_33:
208 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
210 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
211 ret <2 x double> %shuffle
213 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
214 ; SSE-LABEL: shuffle_v2f64_03:
216 ; SSE-NEXT: movsd %xmm0, %xmm1
217 ; SSE-NEXT: movaps %xmm1, %xmm0
220 ; AVX-LABEL: shuffle_v2f64_03:
222 ; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
224 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
225 ret <2 x double> %shuffle
227 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
228 ; SSE-LABEL: shuffle_v2f64_21:
230 ; SSE-NEXT: movsd %xmm1, %xmm0
233 ; AVX-LABEL: shuffle_v2f64_21:
235 ; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0
237 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
238 ret <2 x double> %shuffle
242 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
243 ; SSE-LABEL: shuffle_v2i64_02:
245 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
248 ; AVX-LABEL: shuffle_v2i64_02:
250 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
252 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
253 ret <2 x i64> %shuffle
255 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
256 ; SSE-LABEL: shuffle_v2i64_02_copy:
258 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
259 ; SSE-NEXT: movdqa %xmm1, %xmm0
262 ; AVX-LABEL: shuffle_v2i64_02_copy:
264 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
266 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
267 ret <2 x i64> %shuffle
269 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
270 ; SSE2-LABEL: shuffle_v2i64_03:
272 ; SSE2-NEXT: movsd %xmm0, %xmm1
273 ; SSE2-NEXT: movaps %xmm1, %xmm0
276 ; SSE3-LABEL: shuffle_v2i64_03:
278 ; SSE3-NEXT: movsd %xmm0, %xmm1
279 ; SSE3-NEXT: movaps %xmm1, %xmm0
282 ; SSSE3-LABEL: shuffle_v2i64_03:
284 ; SSSE3-NEXT: movsd %xmm0, %xmm1
285 ; SSSE3-NEXT: movaps %xmm1, %xmm0
288 ; SSE41-LABEL: shuffle_v2i64_03:
290 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
293 ; AVX1-LABEL: shuffle_v2i64_03:
295 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
298 ; AVX2-LABEL: shuffle_v2i64_03:
300 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
302 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
303 ret <2 x i64> %shuffle
305 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
306 ; SSE2-LABEL: shuffle_v2i64_03_copy:
308 ; SSE2-NEXT: movsd %xmm1, %xmm2
309 ; SSE2-NEXT: movaps %xmm2, %xmm0
312 ; SSE3-LABEL: shuffle_v2i64_03_copy:
314 ; SSE3-NEXT: movsd %xmm1, %xmm2
315 ; SSE3-NEXT: movaps %xmm2, %xmm0
318 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
320 ; SSSE3-NEXT: movsd %xmm1, %xmm2
321 ; SSSE3-NEXT: movaps %xmm2, %xmm0
324 ; SSE41-LABEL: shuffle_v2i64_03_copy:
326 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
327 ; SSE41-NEXT: movdqa %xmm1, %xmm0
330 ; AVX1-LABEL: shuffle_v2i64_03_copy:
332 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
335 ; AVX2-LABEL: shuffle_v2i64_03_copy:
337 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
339 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
340 ret <2 x i64> %shuffle
342 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
343 ; SSE2-LABEL: shuffle_v2i64_12:
345 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
348 ; SSE3-LABEL: shuffle_v2i64_12:
350 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
353 ; SSSE3-LABEL: shuffle_v2i64_12:
355 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
356 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
359 ; SSE41-LABEL: shuffle_v2i64_12:
361 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
362 ; SSE41-NEXT: movdqa %xmm1, %xmm0
365 ; AVX-LABEL: shuffle_v2i64_12:
367 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
369 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
370 ret <2 x i64> %shuffle
372 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
373 ; SSE2-LABEL: shuffle_v2i64_12_copy:
375 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
376 ; SSE2-NEXT: movapd %xmm1, %xmm0
379 ; SSE3-LABEL: shuffle_v2i64_12_copy:
381 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
382 ; SSE3-NEXT: movapd %xmm1, %xmm0
385 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
387 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
388 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
391 ; SSE41-LABEL: shuffle_v2i64_12_copy:
393 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
394 ; SSE41-NEXT: movdqa %xmm2, %xmm0
397 ; AVX-LABEL: shuffle_v2i64_12_copy:
399 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
401 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
402 ret <2 x i64> %shuffle
404 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
405 ; SSE-LABEL: shuffle_v2i64_13:
407 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
410 ; AVX-LABEL: shuffle_v2i64_13:
412 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
414 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
415 ret <2 x i64> %shuffle
417 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
418 ; SSE-LABEL: shuffle_v2i64_13_copy:
420 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
421 ; SSE-NEXT: movdqa %xmm1, %xmm0
424 ; AVX-LABEL: shuffle_v2i64_13_copy:
426 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
428 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
429 ret <2 x i64> %shuffle
431 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
432 ; SSE-LABEL: shuffle_v2i64_20:
434 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
435 ; SSE-NEXT: movdqa %xmm1, %xmm0
438 ; AVX-LABEL: shuffle_v2i64_20:
440 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
442 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
443 ret <2 x i64> %shuffle
445 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
446 ; SSE-LABEL: shuffle_v2i64_20_copy:
448 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
449 ; SSE-NEXT: movdqa %xmm2, %xmm0
452 ; AVX-LABEL: shuffle_v2i64_20_copy:
454 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
456 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
457 ret <2 x i64> %shuffle
459 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
460 ; SSE2-LABEL: shuffle_v2i64_21:
462 ; SSE2-NEXT: movsd %xmm1, %xmm0
465 ; SSE3-LABEL: shuffle_v2i64_21:
467 ; SSE3-NEXT: movsd %xmm1, %xmm0
470 ; SSSE3-LABEL: shuffle_v2i64_21:
472 ; SSSE3-NEXT: movsd %xmm1, %xmm0
475 ; SSE41-LABEL: shuffle_v2i64_21:
477 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
478 ; SSE41-NEXT: movdqa %xmm1, %xmm0
481 ; AVX1-LABEL: shuffle_v2i64_21:
483 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
486 ; AVX2-LABEL: shuffle_v2i64_21:
488 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
490 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
491 ret <2 x i64> %shuffle
493 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
494 ; SSE2-LABEL: shuffle_v2i64_21_copy:
496 ; SSE2-NEXT: movsd %xmm2, %xmm1
497 ; SSE2-NEXT: movaps %xmm1, %xmm0
500 ; SSE3-LABEL: shuffle_v2i64_21_copy:
502 ; SSE3-NEXT: movsd %xmm2, %xmm1
503 ; SSE3-NEXT: movaps %xmm1, %xmm0
506 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
508 ; SSSE3-NEXT: movsd %xmm2, %xmm1
509 ; SSSE3-NEXT: movaps %xmm1, %xmm0
512 ; SSE41-LABEL: shuffle_v2i64_21_copy:
514 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
515 ; SSE41-NEXT: movdqa %xmm2, %xmm0
518 ; AVX1-LABEL: shuffle_v2i64_21_copy:
520 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
523 ; AVX2-LABEL: shuffle_v2i64_21_copy:
525 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
527 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
528 ret <2 x i64> %shuffle
530 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
531 ; SSE2-LABEL: shuffle_v2i64_30:
533 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
534 ; SSE2-NEXT: movapd %xmm1, %xmm0
537 ; SSE3-LABEL: shuffle_v2i64_30:
539 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
540 ; SSE3-NEXT: movapd %xmm1, %xmm0
543 ; SSSE3-LABEL: shuffle_v2i64_30:
545 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
548 ; SSE41-LABEL: shuffle_v2i64_30:
550 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
553 ; AVX-LABEL: shuffle_v2i64_30:
555 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
557 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
558 ret <2 x i64> %shuffle
560 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
561 ; SSE2-LABEL: shuffle_v2i64_30_copy:
563 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
564 ; SSE2-NEXT: movapd %xmm2, %xmm0
567 ; SSE3-LABEL: shuffle_v2i64_30_copy:
569 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
570 ; SSE3-NEXT: movapd %xmm2, %xmm0
573 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
575 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
576 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
579 ; SSE41-LABEL: shuffle_v2i64_30_copy:
581 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
582 ; SSE41-NEXT: movdqa %xmm1, %xmm0
585 ; AVX-LABEL: shuffle_v2i64_30_copy:
587 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
589 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
590 ret <2 x i64> %shuffle
592 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
593 ; SSE-LABEL: shuffle_v2i64_31:
595 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
596 ; SSE-NEXT: movdqa %xmm1, %xmm0
599 ; AVX-LABEL: shuffle_v2i64_31:
601 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
603 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
604 ret <2 x i64> %shuffle
606 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
607 ; SSE-LABEL: shuffle_v2i64_31_copy:
609 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
610 ; SSE-NEXT: movdqa %xmm2, %xmm0
613 ; AVX-LABEL: shuffle_v2i64_31_copy:
615 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
617 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
618 ret <2 x i64> %shuffle
621 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
622 ; SSE-LABEL: shuffle_v2i64_0z:
624 ; SSE-NEXT: movq %xmm0, %xmm0
627 ; AVX-LABEL: shuffle_v2i64_0z:
629 ; AVX-NEXT: vmovq %xmm0, %xmm0
631 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
632 ret <2 x i64> %shuffle
635 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
636 ; SSE-LABEL: shuffle_v2i64_1z:
638 ; SSE-NEXT: pxor %xmm1, %xmm1
639 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
642 ; AVX-LABEL: shuffle_v2i64_1z:
644 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
645 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
647 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
648 ret <2 x i64> %shuffle
651 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
652 ; SSE-LABEL: shuffle_v2i64_z0:
654 ; SSE-NEXT: movq %xmm0, %xmm0
655 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
658 ; AVX-LABEL: shuffle_v2i64_z0:
660 ; AVX-NEXT: vmovq %xmm0, %xmm0
661 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
663 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
664 ret <2 x i64> %shuffle
667 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
668 ; SSE2-LABEL: shuffle_v2i64_z1:
670 ; SSE2-NEXT: xorps %xmm1, %xmm1
671 ; SSE2-NEXT: movsd %xmm1, %xmm0
674 ; SSE3-LABEL: shuffle_v2i64_z1:
676 ; SSE3-NEXT: xorps %xmm1, %xmm1
677 ; SSE3-NEXT: movsd %xmm1, %xmm0
680 ; SSSE3-LABEL: shuffle_v2i64_z1:
682 ; SSSE3-NEXT: xorps %xmm1, %xmm1
683 ; SSSE3-NEXT: movsd %xmm1, %xmm0
686 ; SSE41-LABEL: shuffle_v2i64_z1:
688 ; SSE41-NEXT: pxor %xmm1, %xmm1
689 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
690 ; SSE41-NEXT: movdqa %xmm1, %xmm0
693 ; AVX1-LABEL: shuffle_v2i64_z1:
695 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
696 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
699 ; AVX2-LABEL: shuffle_v2i64_z1:
701 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
702 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
704 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
705 ret <2 x i64> %shuffle
708 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
709 ; SSE-LABEL: shuffle_v2f64_0z:
711 ; SSE-NEXT: movq %xmm0, %xmm0
714 ; AVX-LABEL: shuffle_v2f64_0z:
716 ; AVX-NEXT: vmovq %xmm0, %xmm0
718 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
719 ret <2 x double> %shuffle
722 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
723 ; SSE-LABEL: shuffle_v2f64_1z:
725 ; SSE-NEXT: xorpd %xmm1, %xmm1
726 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
729 ; AVX-LABEL: shuffle_v2f64_1z:
731 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
732 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
734 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
735 ret <2 x double> %shuffle
738 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
739 ; SSE-LABEL: shuffle_v2f64_z0:
741 ; SSE-NEXT: xorpd %xmm1, %xmm1
742 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
743 ; SSE-NEXT: movapd %xmm1, %xmm0
746 ; AVX-LABEL: shuffle_v2f64_z0:
748 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
749 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
751 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
752 ret <2 x double> %shuffle
755 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
756 ; SSE-LABEL: shuffle_v2f64_z1:
758 ; SSE-NEXT: xorps %xmm1, %xmm1
759 ; SSE-NEXT: movsd %xmm1, %xmm0
762 ; AVX-LABEL: shuffle_v2f64_z1:
764 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
765 ; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0
767 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
768 ret <2 x double> %shuffle
771 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
772 ; SSE-LABEL: insert_reg_and_zero_v2i64:
774 ; SSE-NEXT: movd %rdi, %xmm0
777 ; AVX-LABEL: insert_reg_and_zero_v2i64:
779 ; AVX-NEXT: vmovq %rdi, %xmm0
781 %v = insertelement <2 x i64> undef, i64 %a, i32 0
782 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
783 ret <2 x i64> %shuffle
786 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
787 ; SSE-LABEL: insert_mem_and_zero_v2i64:
789 ; SSE-NEXT: movq (%rdi), %xmm0
792 ; AVX-LABEL: insert_mem_and_zero_v2i64:
794 ; AVX-NEXT: vmovq (%rdi), %xmm0
797 %v = insertelement <2 x i64> undef, i64 %a, i32 0
798 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
799 ret <2 x i64> %shuffle
802 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
803 ; SSE-LABEL: insert_reg_and_zero_v2f64:
805 ; SSE-NEXT: movq %xmm0, %xmm0
808 ; AVX-LABEL: insert_reg_and_zero_v2f64:
810 ; AVX-NEXT: vmovq %xmm0, %xmm0
812 %v = insertelement <2 x double> undef, double %a, i32 0
813 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
814 ret <2 x double> %shuffle
817 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
818 ; SSE-LABEL: insert_mem_and_zero_v2f64:
820 ; SSE-NEXT: movsd (%rdi), %xmm0
823 ; AVX-LABEL: insert_mem_and_zero_v2f64:
825 ; AVX-NEXT: vmovsd (%rdi), %xmm0
827 %a = load double* %ptr
828 %v = insertelement <2 x double> undef, double %a, i32 0
829 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
830 ret <2 x double> %shuffle
833 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
834 ; SSE2-LABEL: insert_reg_lo_v2i64:
836 ; SSE2-NEXT: movd %rdi, %xmm1
837 ; SSE2-NEXT: movsd %xmm1, %xmm0
840 ; SSE3-LABEL: insert_reg_lo_v2i64:
842 ; SSE3-NEXT: movd %rdi, %xmm1
843 ; SSE3-NEXT: movsd %xmm1, %xmm0
846 ; SSSE3-LABEL: insert_reg_lo_v2i64:
848 ; SSSE3-NEXT: movd %rdi, %xmm1
849 ; SSSE3-NEXT: movsd %xmm1, %xmm0
852 ; SSE41-LABEL: insert_reg_lo_v2i64:
854 ; SSE41-NEXT: movd %rdi, %xmm1
855 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
856 ; SSE41-NEXT: movdqa %xmm1, %xmm0
859 ; AVX1-LABEL: insert_reg_lo_v2i64:
861 ; AVX1-NEXT: vmovq %rdi, %xmm1
862 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
865 ; AVX2-LABEL: insert_reg_lo_v2i64:
867 ; AVX2-NEXT: vmovq %rdi, %xmm1
868 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
870 %v = insertelement <2 x i64> undef, i64 %a, i32 0
871 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
872 ret <2 x i64> %shuffle
875 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
876 ; SSE2-LABEL: insert_mem_lo_v2i64:
878 ; SSE2-NEXT: movlpd (%rdi), %xmm0
881 ; SSE3-LABEL: insert_mem_lo_v2i64:
883 ; SSE3-NEXT: movlpd (%rdi), %xmm0
886 ; SSSE3-LABEL: insert_mem_lo_v2i64:
888 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
891 ; SSE41-LABEL: insert_mem_lo_v2i64:
893 ; SSE41-NEXT: movq (%rdi), %xmm1
894 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
895 ; SSE41-NEXT: movdqa %xmm1, %xmm0
898 ; AVX1-LABEL: insert_mem_lo_v2i64:
900 ; AVX1-NEXT: vmovq (%rdi), %xmm1
901 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
904 ; AVX2-LABEL: insert_mem_lo_v2i64:
906 ; AVX2-NEXT: vmovq (%rdi), %xmm1
907 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
910 %v = insertelement <2 x i64> undef, i64 %a, i32 0
911 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
912 ret <2 x i64> %shuffle
915 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
916 ; SSE-LABEL: insert_reg_hi_v2i64:
918 ; SSE-NEXT: movd %rdi, %xmm1
919 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
922 ; AVX-LABEL: insert_reg_hi_v2i64:
924 ; AVX-NEXT: vmovq %rdi, %xmm1
925 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
927 %v = insertelement <2 x i64> undef, i64 %a, i32 0
928 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
929 ret <2 x i64> %shuffle
932 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
933 ; SSE-LABEL: insert_mem_hi_v2i64:
935 ; SSE-NEXT: movq (%rdi), %xmm1
936 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
939 ; AVX-LABEL: insert_mem_hi_v2i64:
941 ; AVX-NEXT: vmovq (%rdi), %xmm1
942 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
945 %v = insertelement <2 x i64> undef, i64 %a, i32 0
946 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
947 ret <2 x i64> %shuffle
950 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
951 ; SSE-LABEL: insert_reg_lo_v2f64:
953 ; SSE-NEXT: movsd %xmm0, %xmm1
954 ; SSE-NEXT: movaps %xmm1, %xmm0
957 ; AVX-LABEL: insert_reg_lo_v2f64:
959 ; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
961 %v = insertelement <2 x double> undef, double %a, i32 0
962 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
963 ret <2 x double> %shuffle
966 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
967 ; SSE-LABEL: insert_mem_lo_v2f64:
969 ; SSE-NEXT: movlpd (%rdi), %xmm0
972 ; AVX-LABEL: insert_mem_lo_v2f64:
974 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
976 %a = load double* %ptr
977 %v = insertelement <2 x double> undef, double %a, i32 0
978 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
979 ret <2 x double> %shuffle
982 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
983 ; SSE-LABEL: insert_reg_hi_v2f64:
985 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
986 ; SSE-NEXT: movapd %xmm1, %xmm0
989 ; AVX-LABEL: insert_reg_hi_v2f64:
991 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
993 %v = insertelement <2 x double> undef, double %a, i32 0
994 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
995 ret <2 x double> %shuffle
998 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
999 ; SSE-LABEL: insert_mem_hi_v2f64:
1001 ; SSE-NEXT: movhpd (%rdi), %xmm0
1004 ; AVX-LABEL: insert_mem_hi_v2f64:
1006 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1008 %a = load double* %ptr
1009 %v = insertelement <2 x double> undef, double %a, i32 0
1010 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1011 ret <2 x double> %shuffle
1014 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1015 ; FIXME: We should match movddup for SSE3 and higher here.
1017 ; SSE2-LABEL: insert_dup_reg_v2f64:
1019 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1022 ; SSE3-LABEL: insert_dup_reg_v2f64:
1024 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1027 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1029 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1032 ; SSE41-LABEL: insert_dup_reg_v2f64:
1034 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1037 ; AVX-LABEL: insert_dup_reg_v2f64:
1039 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1041 %v = insertelement <2 x double> undef, double %a, i32 0
1042 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1043 ret <2 x double> %shuffle
1045 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1046 ; SSE2-LABEL: insert_dup_mem_v2f64:
1048 ; SSE2-NEXT: movsd (%rdi), %xmm0
1049 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1052 ; SSE3-LABEL: insert_dup_mem_v2f64:
1054 ; SSE3-NEXT: movddup (%rdi), %xmm0
1057 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1059 ; SSSE3-NEXT: movddup (%rdi), %xmm0
1062 ; SSE41-LABEL: insert_dup_mem_v2f64:
1064 ; SSE41-NEXT: movddup (%rdi), %xmm0
1067 ; AVX-LABEL: insert_dup_mem_v2f64:
1069 ; AVX-NEXT: vmovddup (%rdi), %xmm0
1071 %a = load double* %ptr
1072 %v = insertelement <2 x double> undef, double %a, i32 0
1073 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1074 ret <2 x double> %shuffle
1077 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1078 ; SSE-LABEL: shuffle_mem_v2f64_10:
1080 ; SSE-NEXT: movapd (%rdi), %xmm0
1081 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1084 ; AVX-LABEL: shuffle_mem_v2f64_10:
1086 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1088 %a = load <2 x double>* %ptr
1089 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1090 ret <2 x double> %shuffle