1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
7 ; AVX1-LABEL: shuffle_v4f64_0000:
9 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX2-LABEL: shuffle_v4f64_0000:
15 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,0]
17 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18 ret <4 x double> %shuffle
21 define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
22 ; AVX1-LABEL: shuffle_v4f64_0001:
24 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
25 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
28 ; AVX2-LABEL: shuffle_v4f64_0001:
30 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
32 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
33 ret <4 x double> %shuffle
36 define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
37 ; AVX1-LABEL: shuffle_v4f64_0020:
39 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
40 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
41 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
42 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
45 ; AVX2-LABEL: shuffle_v4f64_0020:
47 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
49 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
50 ret <4 x double> %shuffle
53 define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
54 ; AVX1-LABEL: shuffle_v4f64_0300:
56 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
57 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
58 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
61 ; AVX2-LABEL: shuffle_v4f64_0300:
63 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
65 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
66 ret <4 x double> %shuffle
69 define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
70 ; AVX1-LABEL: shuffle_v4f64_1000:
72 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
73 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
74 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
77 ; AVX2-LABEL: shuffle_v4f64_1000:
79 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
81 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
82 ret <4 x double> %shuffle
85 define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
86 ; AVX1-LABEL: shuffle_v4f64_2200:
88 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
89 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
92 ; AVX2-LABEL: shuffle_v4f64_2200:
94 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
96 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
97 ret <4 x double> %shuffle
100 define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
101 ; AVX1-LABEL: shuffle_v4f64_3330:
103 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
104 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2]
105 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
106 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
109 ; AVX2-LABEL: shuffle_v4f64_3330:
111 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
113 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
114 ret <4 x double> %shuffle
117 define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
118 ; AVX1-LABEL: shuffle_v4f64_3210:
120 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
121 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
124 ; AVX2-LABEL: shuffle_v4f64_3210:
126 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
128 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
129 ret <4 x double> %shuffle
132 define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
133 ; ALL-LABEL: shuffle_v4f64_0023:
135 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
137 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
138 ret <4 x double> %shuffle
141 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
142 ; ALL-LABEL: shuffle_v4f64_0022:
144 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
146 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
147 ret <4 x double> %shuffle
150 define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
151 ; ALL-LABEL: shuffle_v4f64_1032:
153 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
155 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
156 ret <4 x double> %shuffle
159 define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
160 ; ALL-LABEL: shuffle_v4f64_1133:
162 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
164 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
165 ret <4 x double> %shuffle
168 define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
169 ; ALL-LABEL: shuffle_v4f64_1023:
171 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
173 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
174 ret <4 x double> %shuffle
177 define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
178 ; ALL-LABEL: shuffle_v4f64_1022:
180 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
182 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
183 ret <4 x double> %shuffle
186 define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
187 ; ALL-LABEL: shuffle_v4f64_0423:
189 ; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
190 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
192 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
193 ret <4 x double> %shuffle
196 define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
197 ; ALL-LABEL: shuffle_v4f64_0462:
199 ; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
200 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
201 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
203 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
204 ret <4 x double> %shuffle
207 define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
208 ; ALL-LABEL: shuffle_v4f64_0426:
210 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
212 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
213 ret <4 x double> %shuffle
216 define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
217 ; ALL-LABEL: shuffle_v4f64_1537:
219 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
221 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
222 ret <4 x double> %shuffle
225 define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
226 ; ALL-LABEL: shuffle_v4f64_4062:
228 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
230 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
231 ret <4 x double> %shuffle
234 define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
235 ; ALL-LABEL: shuffle_v4f64_5173:
237 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
239 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
240 ret <4 x double> %shuffle
243 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
244 ; ALL-LABEL: shuffle_v4f64_5163:
246 ; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
248 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
249 ret <4 x double> %shuffle
252 define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
253 ; ALL-LABEL: shuffle_v4f64_0527:
255 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
257 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
258 ret <4 x double> %shuffle
261 define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
262 ; ALL-LABEL: shuffle_v4f64_4163:
264 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
266 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
267 ret <4 x double> %shuffle
270 define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
271 ; AVX1-LABEL: shuffle_v4f64_0145:
273 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
274 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
277 ; AVX2-LABEL: shuffle_v4f64_0145:
279 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,1]
280 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
282 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
283 ret <4 x double> %shuffle
286 define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
287 ; AVX1-LABEL: shuffle_v4f64_4501:
289 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
290 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
293 ; AVX2-LABEL: shuffle_v4f64_4501:
295 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
296 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
298 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
299 ret <4 x double> %shuffle
302 define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
303 ; ALL-LABEL: shuffle_v4f64_0167:
305 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
307 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
308 ret <4 x double> %shuffle
311 define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
312 ; AVX1-LABEL: shuffle_v4i64_0000:
314 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
315 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
318 ; AVX2-LABEL: shuffle_v4i64_0000:
320 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,0]
322 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
323 ret <4 x i64> %shuffle
326 define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
327 ; AVX1-LABEL: shuffle_v4i64_0001:
329 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
330 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
333 ; AVX2-LABEL: shuffle_v4i64_0001:
335 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
337 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
338 ret <4 x i64> %shuffle
341 define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
342 ; AVX1-LABEL: shuffle_v4i64_0020:
344 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
345 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
346 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
347 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
350 ; AVX2-LABEL: shuffle_v4i64_0020:
352 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
354 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
355 ret <4 x i64> %shuffle
358 define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
359 ; AVX1-LABEL: shuffle_v4i64_0112:
361 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
362 ; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
363 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
366 ; AVX2-LABEL: shuffle_v4i64_0112:
368 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
370 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
371 ret <4 x i64> %shuffle
374 define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
375 ; AVX1-LABEL: shuffle_v4i64_0300:
377 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
378 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
379 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
382 ; AVX2-LABEL: shuffle_v4i64_0300:
384 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
386 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
387 ret <4 x i64> %shuffle
390 define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
391 ; AVX1-LABEL: shuffle_v4i64_1000:
393 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
394 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
395 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
398 ; AVX2-LABEL: shuffle_v4i64_1000:
400 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
402 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
403 ret <4 x i64> %shuffle
406 define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
407 ; AVX1-LABEL: shuffle_v4i64_2200:
409 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
410 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
413 ; AVX2-LABEL: shuffle_v4i64_2200:
415 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
417 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
418 ret <4 x i64> %shuffle
421 define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
422 ; AVX1-LABEL: shuffle_v4i64_3330:
424 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
425 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2]
426 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
427 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
430 ; AVX2-LABEL: shuffle_v4i64_3330:
432 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
434 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
435 ret <4 x i64> %shuffle
438 define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
439 ; AVX1-LABEL: shuffle_v4i64_3210:
441 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
442 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
445 ; AVX2-LABEL: shuffle_v4i64_3210:
447 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
449 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
450 ret <4 x i64> %shuffle
453 define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
454 ; AVX1-LABEL: shuffle_v4i64_0124:
456 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0,0]
457 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
458 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
461 ; AVX2-LABEL: shuffle_v4i64_0124:
463 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,0]
464 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
466 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
467 ret <4 x i64> %shuffle
470 define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
471 ; AVX1-LABEL: shuffle_v4i64_0142:
473 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
474 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
475 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
478 ; AVX2-LABEL: shuffle_v4i64_0142:
480 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,0,3]
481 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
482 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
484 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
485 ret <4 x i64> %shuffle
488 define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
489 ; AVX1-LABEL: shuffle_v4i64_0412:
491 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
492 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
493 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
494 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
495 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
498 ; AVX2-LABEL: shuffle_v4i64_0412:
500 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
501 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
502 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
504 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
505 ret <4 x i64> %shuffle
508 define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
509 ; AVX1-LABEL: shuffle_v4i64_4012:
511 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
512 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
513 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
514 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
515 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
518 ; AVX2-LABEL: shuffle_v4i64_4012:
520 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
521 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
523 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
524 ret <4 x i64> %shuffle
527 define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
528 ; AVX1-LABEL: shuffle_v4i64_0145:
530 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
531 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
534 ; AVX2-LABEL: shuffle_v4i64_0145:
536 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,0,1]
537 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
539 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
540 ret <4 x i64> %shuffle
543 define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
544 ; AVX1-LABEL: shuffle_v4i64_0451:
546 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
547 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm1[0,0]
548 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
549 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
550 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
553 ; AVX2-LABEL: shuffle_v4i64_0451:
555 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
556 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
557 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
559 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
560 ret <4 x i64> %shuffle
563 define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
564 ; AVX1-LABEL: shuffle_v4i64_4501:
566 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
567 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
570 ; AVX2-LABEL: shuffle_v4i64_4501:
572 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
573 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
575 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
576 ret <4 x i64> %shuffle
579 define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
580 ; AVX1-LABEL: shuffle_v4i64_4015:
582 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
583 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm0[0,0]
584 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
585 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
586 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3]
589 ; AVX2-LABEL: shuffle_v4i64_4015:
591 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
592 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
593 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
595 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
596 ret <4 x i64> %shuffle
599 define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
600 ; AVX1-LABEL: shuffle_v4i64_2u35:
602 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
603 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
604 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
605 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
606 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
609 ; AVX2-LABEL: shuffle_v4i64_2u35:
611 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
612 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
613 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
615 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
616 ret <4 x i64> %shuffle
619 define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
620 ; AVX1-LABEL: shuffle_v4i64_1251:
622 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
623 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3]
624 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
625 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
626 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
629 ; AVX2-LABEL: shuffle_v4i64_1251:
631 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
632 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
633 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
635 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
636 ret <4 x i64> %shuffle
639 define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
640 ; AVX1-LABEL: stress_test1:
642 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
643 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm0[1,0,3,2]
644 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3]
645 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
646 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
647 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
648 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
649 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
650 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
651 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
654 ; AVX2-LABEL: stress_test1:
656 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm1[3,1,1,0]
657 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,3]
658 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,3,1,3]
659 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
660 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,0]
661 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
663 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
664 %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
665 %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
666 %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
671 define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
672 ; AVX1-LABEL: insert_reg_and_zero_v4i64:
674 ; AVX1-NEXT: vmovq %rdi, %xmm0
675 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
676 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
679 ; AVX2-LABEL: insert_reg_and_zero_v4i64:
681 ; AVX2-NEXT: vmovq %rdi, %xmm0
682 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
683 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
685 %v = insertelement <4 x i64> undef, i64 %a, i64 0
686 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
687 ret <4 x i64> %shuffle
690 define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
691 ; AVX1-LABEL: insert_mem_and_zero_v4i64:
693 ; AVX1-NEXT: vmovq (%rdi), %xmm0
694 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
695 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
698 ; AVX2-LABEL: insert_mem_and_zero_v4i64:
700 ; AVX2-NEXT: vmovq (%rdi), %xmm0
701 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
702 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
705 %v = insertelement <4 x i64> undef, i64 %a, i64 0
706 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
707 ret <4 x i64> %shuffle
710 define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
711 ; ALL-LABEL: insert_reg_and_zero_v4f64:
713 ; ALL-NEXT: # kill: XMM0<def> XMM0<kill> YMM0<def>
714 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
715 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
717 %v = insertelement <4 x double> undef, double %a, i32 0
718 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
719 ret <4 x double> %shuffle
722 define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
723 ; ALL-LABEL: insert_mem_and_zero_v4f64:
725 ; ALL-NEXT: vmovsd (%rdi), %xmm0
726 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
727 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
729 %a = load double* %ptr
730 %v = insertelement <4 x double> undef, double %a, i32 0
731 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
732 ret <4 x double> %shuffle