1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
7 ; AVX1-LABEL: shuffle_v8f32_00000000:
9 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX2-LABEL: shuffle_v8f32_00000000:
15 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
17 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
18 ret <8 x float> %shuffle
21 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
22 ; AVX1-LABEL: shuffle_v8f32_00000010:
24 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
26 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
29 ; AVX2-LABEL: shuffle_v8f32_00000010:
31 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
32 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
34 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
35 ret <8 x float> %shuffle
38 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
39 ; AVX1-LABEL: shuffle_v8f32_00000200:
41 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
43 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
46 ; AVX2-LABEL: shuffle_v8f32_00000200:
48 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
49 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
51 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
52 ret <8 x float> %shuffle
55 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
56 ; AVX1-LABEL: shuffle_v8f32_00003000:
58 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
60 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
63 ; AVX2-LABEL: shuffle_v8f32_00003000:
65 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
66 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
68 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
69 ret <8 x float> %shuffle
72 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
73 ; AVX1-LABEL: shuffle_v8f32_00040000:
75 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
76 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
77 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
78 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
81 ; AVX2-LABEL: shuffle_v8f32_00040000:
83 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
84 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
86 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
87 ret <8 x float> %shuffle
90 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
91 ; AVX1-LABEL: shuffle_v8f32_00500000:
93 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
94 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,u,1,u,4,4,4,4]
95 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
96 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
99 ; AVX2-LABEL: shuffle_v8f32_00500000:
101 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
102 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
104 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
105 ret <8 x float> %shuffle
108 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
109 ; AVX1-LABEL: shuffle_v8f32_06000000:
111 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
112 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,2,u,u,4,4,4,4]
113 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,0,4,5,4,4]
114 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6,7]
117 ; AVX2-LABEL: shuffle_v8f32_06000000:
119 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
120 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
122 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
123 ret <8 x float> %shuffle
126 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
127 ; AVX1-LABEL: shuffle_v8f32_70000000:
129 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
130 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,u,u,u,4,4,4,4]
131 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
132 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4,5,6,7]
135 ; AVX2-LABEL: shuffle_v8f32_70000000:
137 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
138 ; AVX2-NEXT: movl $7, %eax
139 ; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
140 ; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
141 ; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1
142 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
144 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
145 ret <8 x float> %shuffle
148 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
149 ; ALL-LABEL: shuffle_v8f32_01014545:
151 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
153 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
154 ret <8 x float> %shuffle
157 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
158 ; AVX1-LABEL: shuffle_v8f32_00112233:
160 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
161 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
162 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
165 ; AVX2-LABEL: shuffle_v8f32_00112233:
167 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
168 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
170 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
171 ret <8 x float> %shuffle
174 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
175 ; AVX1-LABEL: shuffle_v8f32_00001111:
177 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
178 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
179 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
182 ; AVX2-LABEL: shuffle_v8f32_00001111:
184 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
185 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
187 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
188 ret <8 x float> %shuffle
191 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
192 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
194 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
196 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
197 ret <8 x float> %shuffle
200 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
201 ; AVX1-LABEL: shuffle_v8f32_08080808:
203 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
204 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
205 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
206 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
207 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
210 ; AVX2-LABEL: shuffle_v8f32_08080808:
212 ; AVX2-NEXT: vbroadcastss %xmm1, %ymm1
213 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
214 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
216 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
217 ret <8 x float> %shuffle
220 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
221 ; ALL-LABEL: shuffle_v8f32_08084c4c:
223 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
224 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
226 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
227 ret <8 x float> %shuffle
230 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
231 ; ALL-LABEL: shuffle_v8f32_8823cc67:
233 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
235 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
236 ret <8 x float> %shuffle
239 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
240 ; ALL-LABEL: shuffle_v8f32_9832dc76:
242 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
244 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
245 ret <8 x float> %shuffle
248 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
249 ; ALL-LABEL: shuffle_v8f32_9810dc54:
251 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
253 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
254 ret <8 x float> %shuffle
257 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
258 ; ALL-LABEL: shuffle_v8f32_08194c5d:
260 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
262 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
263 ret <8 x float> %shuffle
266 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
267 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
269 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
271 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
272 ret <8 x float> %shuffle
275 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
276 ; AVX1-LABEL: shuffle_v8f32_08192a3b:
278 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
279 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
280 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
283 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
285 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
286 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
287 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
288 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
289 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
291 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
292 ret <8 x float> %shuffle
295 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
296 ; AVX1-LABEL: shuffle_v8f32_08991abb:
298 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,0],xmm1[2,0]
299 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[3,3]
300 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
301 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,1]
302 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
305 ; AVX2-LABEL: shuffle_v8f32_08991abb:
307 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
308 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
309 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
310 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
311 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
313 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
314 ret <8 x float> %shuffle
317 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
318 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
320 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
321 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
322 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
323 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
326 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
328 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
329 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
330 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
332 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
333 ret <8 x float> %shuffle
336 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
337 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
339 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
340 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
341 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
344 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
346 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
347 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
348 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
350 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
351 ret <8 x float> %shuffle
354 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
355 ; ALL-LABEL: shuffle_v8f32_00014445:
357 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
359 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
360 ret <8 x float> %shuffle
363 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
364 ; ALL-LABEL: shuffle_v8f32_00204464:
366 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
368 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
369 ret <8 x float> %shuffle
372 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
373 ; ALL-LABEL: shuffle_v8f32_03004744:
375 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
377 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
378 ret <8 x float> %shuffle
381 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
382 ; ALL-LABEL: shuffle_v8f32_10005444:
384 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
386 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
387 ret <8 x float> %shuffle
390 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
391 ; ALL-LABEL: shuffle_v8f32_22006644:
393 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
395 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
396 ret <8 x float> %shuffle
399 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
400 ; ALL-LABEL: shuffle_v8f32_33307774:
402 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
404 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
405 ret <8 x float> %shuffle
408 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
409 ; ALL-LABEL: shuffle_v8f32_32107654:
411 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
413 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
414 ret <8 x float> %shuffle
417 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
418 ; ALL-LABEL: shuffle_v8f32_00234467:
420 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
422 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
423 ret <8 x float> %shuffle
426 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
427 ; ALL-LABEL: shuffle_v8f32_00224466:
429 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
431 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
432 ret <8 x float> %shuffle
435 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
436 ; ALL-LABEL: shuffle_v8f32_10325476:
438 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
440 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
441 ret <8 x float> %shuffle
444 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
445 ; ALL-LABEL: shuffle_v8f32_11335577:
447 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
449 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
450 ret <8 x float> %shuffle
453 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
454 ; ALL-LABEL: shuffle_v8f32_10235467:
456 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
458 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
459 ret <8 x float> %shuffle
462 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
463 ; ALL-LABEL: shuffle_v8f32_10225466:
465 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
467 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
468 ret <8 x float> %shuffle
471 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
472 ; ALL-LABEL: shuffle_v8f32_00015444:
474 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
476 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
477 ret <8 x float> %shuffle
480 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
481 ; ALL-LABEL: shuffle_v8f32_00204644:
483 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
485 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
486 ret <8 x float> %shuffle
489 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
490 ; ALL-LABEL: shuffle_v8f32_03004474:
492 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
494 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
495 ret <8 x float> %shuffle
498 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
499 ; ALL-LABEL: shuffle_v8f32_10004444:
501 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
503 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
504 ret <8 x float> %shuffle
507 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
508 ; ALL-LABEL: shuffle_v8f32_22006446:
510 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
512 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
513 ret <8 x float> %shuffle
516 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
517 ; ALL-LABEL: shuffle_v8f32_33307474:
519 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
521 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
522 ret <8 x float> %shuffle
525 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
526 ; ALL-LABEL: shuffle_v8f32_32104567:
528 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
530 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
531 ret <8 x float> %shuffle
534 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
535 ; ALL-LABEL: shuffle_v8f32_00236744:
537 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
539 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
540 ret <8 x float> %shuffle
543 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
544 ; ALL-LABEL: shuffle_v8f32_00226644:
546 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
548 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
549 ret <8 x float> %shuffle
552 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
553 ; ALL-LABEL: shuffle_v8f32_10324567:
555 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
557 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
558 ret <8 x float> %shuffle
561 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
562 ; ALL-LABEL: shuffle_v8f32_11334567:
564 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
566 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
567 ret <8 x float> %shuffle
570 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
571 ; ALL-LABEL: shuffle_v8f32_01235467:
573 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
575 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
576 ret <8 x float> %shuffle
579 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
580 ; ALL-LABEL: shuffle_v8f32_01235466:
582 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
584 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
585 ret <8 x float> %shuffle
588 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
589 ; ALL-LABEL: shuffle_v8f32_002u6u44:
591 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
593 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
594 ret <8 x float> %shuffle
597 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
598 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
600 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
602 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
603 ret <8 x float> %shuffle
606 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
607 ; ALL-LABEL: shuffle_v8f32_103245uu:
609 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
611 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
612 ret <8 x float> %shuffle
615 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
616 ; ALL-LABEL: shuffle_v8f32_1133uu67:
618 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
620 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
621 ret <8 x float> %shuffle
624 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
625 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
627 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
629 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
630 ret <8 x float> %shuffle
633 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
634 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
636 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
638 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
639 ret <8 x float> %shuffle
642 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
643 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
645 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
646 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
647 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
648 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
649 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
650 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
653 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
655 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
656 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
657 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
658 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
659 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
661 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
662 ret <8 x float> %shuffle
665 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
666 ; AVX1-LABEL: shuffle_v8f32_f511235a:
668 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
669 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
670 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
671 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
672 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
673 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
674 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
677 ; AVX2-LABEL: shuffle_v8f32_f511235a:
679 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
680 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
681 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
682 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
683 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
685 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
686 ret <8 x float> %shuffle
689 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
690 ; AVX1-LABEL: shuffle_v8f32_32103210:
692 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
693 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
696 ; AVX2-LABEL: shuffle_v8f32_32103210:
698 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
699 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
701 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
702 ret <8 x float> %shuffle
705 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
706 ; AVX1-LABEL: shuffle_v8f32_76547654:
708 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
709 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
710 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
713 ; AVX2-LABEL: shuffle_v8f32_76547654:
715 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
716 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
718 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
719 ret <8 x float> %shuffle
722 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
723 ; AVX1-LABEL: shuffle_v8f32_76543210:
725 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
726 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
729 ; AVX2-LABEL: shuffle_v8f32_76543210:
731 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
732 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
734 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
735 ret <8 x float> %shuffle
738 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
739 ; ALL-LABEL: shuffle_v8f32_3210ba98:
741 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
742 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
744 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
745 ret <8 x float> %shuffle
748 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
749 ; ALL-LABEL: shuffle_v8f32_3210fedc:
751 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
752 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
754 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
755 ret <8 x float> %shuffle
758 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
759 ; ALL-LABEL: shuffle_v8f32_7654fedc:
761 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
762 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
764 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
765 ret <8 x float> %shuffle
768 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
769 ; ALL-LABEL: shuffle_v8f32_fedc7654:
771 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
772 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
774 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
775 ret <8 x float> %shuffle
778 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
779 ; ALL-LABEL: shuffle_v8f32_ba987654:
781 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
782 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
784 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
785 ret <8 x float> %shuffle
788 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
789 ; ALL-LABEL: shuffle_v8f32_ba983210:
791 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
792 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
794 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
795 ret <8 x float> %shuffle
798 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
799 ; AVX1-LABEL: shuffle_v8i32_00000000:
801 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
802 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
805 ; AVX2-LABEL: shuffle_v8i32_00000000:
807 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
809 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
810 ret <8 x i32> %shuffle
813 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
814 ; AVX1-LABEL: shuffle_v8i32_00000010:
816 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
817 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
818 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
821 ; AVX2-LABEL: shuffle_v8i32_00000010:
823 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
824 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
826 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
827 ret <8 x i32> %shuffle
830 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
831 ; AVX1-LABEL: shuffle_v8i32_00000200:
833 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
834 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
835 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
838 ; AVX2-LABEL: shuffle_v8i32_00000200:
840 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
841 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
843 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
844 ret <8 x i32> %shuffle
847 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
848 ; AVX1-LABEL: shuffle_v8i32_00003000:
850 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
851 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
852 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
855 ; AVX2-LABEL: shuffle_v8i32_00003000:
857 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
858 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
860 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
861 ret <8 x i32> %shuffle
864 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
865 ; AVX1-LABEL: shuffle_v8i32_00040000:
867 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
868 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
869 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
870 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
873 ; AVX2-LABEL: shuffle_v8i32_00040000:
875 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
876 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
878 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
879 ret <8 x i32> %shuffle
882 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
883 ; AVX1-LABEL: shuffle_v8i32_00500000:
885 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
886 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,u,1,u,4,4,4,4]
887 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
888 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
891 ; AVX2-LABEL: shuffle_v8i32_00500000:
893 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
894 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
896 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
897 ret <8 x i32> %shuffle
900 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
901 ; AVX1-LABEL: shuffle_v8i32_06000000:
903 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
904 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,2,u,u,4,4,4,4]
905 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,0,4,5,4,4]
906 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6,7]
909 ; AVX2-LABEL: shuffle_v8i32_06000000:
911 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
912 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
914 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
915 ret <8 x i32> %shuffle
918 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
919 ; AVX1-LABEL: shuffle_v8i32_70000000:
921 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
922 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,u,u,u,4,4,4,4]
923 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
924 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4,5,6,7]
927 ; AVX2-LABEL: shuffle_v8i32_70000000:
929 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
930 ; AVX2-NEXT: movl $7, %eax
931 ; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
932 ; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
933 ; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1
934 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
936 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
937 ret <8 x i32> %shuffle
940 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
941 ; AVX1-LABEL: shuffle_v8i32_01014545:
943 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
946 ; AVX2-LABEL: shuffle_v8i32_01014545:
948 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
950 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
951 ret <8 x i32> %shuffle
954 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
955 ; AVX1-LABEL: shuffle_v8i32_00112233:
957 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
958 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
959 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
962 ; AVX2-LABEL: shuffle_v8i32_00112233:
964 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
965 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
967 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
968 ret <8 x i32> %shuffle
971 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
972 ; AVX1-LABEL: shuffle_v8i32_00001111:
974 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
975 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
976 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
979 ; AVX2-LABEL: shuffle_v8i32_00001111:
981 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
982 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
984 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
985 ret <8 x i32> %shuffle
988 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
989 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
991 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
994 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
996 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
998 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
999 ret <8 x i32> %shuffle
1002 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1003 ; AVX1-LABEL: shuffle_v8i32_08080808:
1005 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
1006 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
1007 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1008 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1009 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1012 ; AVX2-LABEL: shuffle_v8i32_08080808:
1014 ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
1015 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
1016 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1018 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1019 ret <8 x i32> %shuffle
1022 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1023 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
1025 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1026 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1029 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
1031 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1032 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1033 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1035 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1036 ret <8 x i32> %shuffle
1039 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1040 ; AVX1-LABEL: shuffle_v8i32_8823cc67:
1042 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1045 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
1047 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1048 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1050 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1051 ret <8 x i32> %shuffle
1054 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1055 ; AVX1-LABEL: shuffle_v8i32_9832dc76:
1057 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1060 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
1062 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
1063 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1064 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1066 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1067 ret <8 x i32> %shuffle
1070 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1071 ; AVX1-LABEL: shuffle_v8i32_9810dc54:
1073 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1076 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
1078 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1079 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1080 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1082 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1083 ret <8 x i32> %shuffle
1086 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1087 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
1089 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1092 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
1094 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1096 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1097 ret <8 x i32> %shuffle
1100 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1101 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1103 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1106 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1108 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1110 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1111 ret <8 x i32> %shuffle
1114 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1115 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
1117 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1118 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1119 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1122 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
1124 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1125 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1126 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1127 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1129 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1130 ret <8 x i32> %shuffle
1133 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1134 ; AVX1-LABEL: shuffle_v8i32_08991abb:
1136 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,0],xmm1[2,0]
1137 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[3,3]
1138 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
1139 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,1]
1140 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1143 ; AVX2-LABEL: shuffle_v8i32_08991abb:
1145 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1146 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1147 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1148 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1149 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1151 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1152 ret <8 x i32> %shuffle
1155 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1156 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1158 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
1159 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
1160 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1161 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1164 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1166 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1167 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1169 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1170 ret <8 x i32> %shuffle
1173 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1174 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
1176 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1177 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1178 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1181 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
1183 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1184 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1185 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1187 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1188 ret <8 x i32> %shuffle
1191 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1192 ; AVX1-LABEL: shuffle_v8i32_00014445:
1194 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1197 ; AVX2-LABEL: shuffle_v8i32_00014445:
1199 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1201 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1202 ret <8 x i32> %shuffle
1205 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1206 ; AVX1-LABEL: shuffle_v8i32_00204464:
1208 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1211 ; AVX2-LABEL: shuffle_v8i32_00204464:
1213 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1215 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1216 ret <8 x i32> %shuffle
1219 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1220 ; AVX1-LABEL: shuffle_v8i32_03004744:
1222 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1225 ; AVX2-LABEL: shuffle_v8i32_03004744:
1227 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1229 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1230 ret <8 x i32> %shuffle
1233 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1234 ; AVX1-LABEL: shuffle_v8i32_10005444:
1236 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1239 ; AVX2-LABEL: shuffle_v8i32_10005444:
1241 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1243 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1244 ret <8 x i32> %shuffle
1247 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1248 ; AVX1-LABEL: shuffle_v8i32_22006644:
1250 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1253 ; AVX2-LABEL: shuffle_v8i32_22006644:
1255 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1257 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1258 ret <8 x i32> %shuffle
1261 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1262 ; AVX1-LABEL: shuffle_v8i32_33307774:
1264 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1267 ; AVX2-LABEL: shuffle_v8i32_33307774:
1269 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1271 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1272 ret <8 x i32> %shuffle
1275 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1276 ; AVX1-LABEL: shuffle_v8i32_32107654:
1278 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1281 ; AVX2-LABEL: shuffle_v8i32_32107654:
1283 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1285 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1286 ret <8 x i32> %shuffle
1289 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1290 ; AVX1-LABEL: shuffle_v8i32_00234467:
1292 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1295 ; AVX2-LABEL: shuffle_v8i32_00234467:
1297 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1299 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1300 ret <8 x i32> %shuffle
1303 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1304 ; AVX1-LABEL: shuffle_v8i32_00224466:
1306 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1309 ; AVX2-LABEL: shuffle_v8i32_00224466:
1311 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1313 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1314 ret <8 x i32> %shuffle
1317 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1318 ; AVX1-LABEL: shuffle_v8i32_10325476:
1320 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1323 ; AVX2-LABEL: shuffle_v8i32_10325476:
1325 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1327 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1328 ret <8 x i32> %shuffle
1331 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1332 ; AVX1-LABEL: shuffle_v8i32_11335577:
1334 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1337 ; AVX2-LABEL: shuffle_v8i32_11335577:
1339 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1341 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1342 ret <8 x i32> %shuffle
1345 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1346 ; AVX1-LABEL: shuffle_v8i32_10235467:
1348 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1351 ; AVX2-LABEL: shuffle_v8i32_10235467:
1353 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1355 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1356 ret <8 x i32> %shuffle
1359 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1360 ; AVX1-LABEL: shuffle_v8i32_10225466:
1362 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1365 ; AVX2-LABEL: shuffle_v8i32_10225466:
1367 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1369 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1370 ret <8 x i32> %shuffle
1373 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1374 ; AVX1-LABEL: shuffle_v8i32_00015444:
1376 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1379 ; AVX2-LABEL: shuffle_v8i32_00015444:
1381 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1382 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1384 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1385 ret <8 x i32> %shuffle
1388 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1389 ; AVX1-LABEL: shuffle_v8i32_00204644:
1391 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1394 ; AVX2-LABEL: shuffle_v8i32_00204644:
1396 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1397 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1399 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1400 ret <8 x i32> %shuffle
1403 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1404 ; AVX1-LABEL: shuffle_v8i32_03004474:
1406 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1409 ; AVX2-LABEL: shuffle_v8i32_03004474:
1411 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1412 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1414 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1415 ret <8 x i32> %shuffle
1418 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1419 ; AVX1-LABEL: shuffle_v8i32_10004444:
1421 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1424 ; AVX2-LABEL: shuffle_v8i32_10004444:
1426 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1427 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1429 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1430 ret <8 x i32> %shuffle
1433 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1434 ; AVX1-LABEL: shuffle_v8i32_22006446:
1436 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1439 ; AVX2-LABEL: shuffle_v8i32_22006446:
1441 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1442 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1444 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1445 ret <8 x i32> %shuffle
1448 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1449 ; AVX1-LABEL: shuffle_v8i32_33307474:
1451 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1454 ; AVX2-LABEL: shuffle_v8i32_33307474:
1456 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1457 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1459 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1460 ret <8 x i32> %shuffle
1463 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1464 ; AVX1-LABEL: shuffle_v8i32_32104567:
1466 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1469 ; AVX2-LABEL: shuffle_v8i32_32104567:
1471 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1472 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1474 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1475 ret <8 x i32> %shuffle
1478 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1479 ; AVX1-LABEL: shuffle_v8i32_00236744:
1481 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1484 ; AVX2-LABEL: shuffle_v8i32_00236744:
1486 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1487 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1489 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1490 ret <8 x i32> %shuffle
1493 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1494 ; AVX1-LABEL: shuffle_v8i32_00226644:
1496 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1499 ; AVX2-LABEL: shuffle_v8i32_00226644:
1501 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1502 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1504 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1505 ret <8 x i32> %shuffle
1508 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1509 ; AVX1-LABEL: shuffle_v8i32_10324567:
1511 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1514 ; AVX2-LABEL: shuffle_v8i32_10324567:
1516 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1517 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1519 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1520 ret <8 x i32> %shuffle
1523 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1524 ; AVX1-LABEL: shuffle_v8i32_11334567:
1526 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1529 ; AVX2-LABEL: shuffle_v8i32_11334567:
1531 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1532 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1534 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1535 ret <8 x i32> %shuffle
1538 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1539 ; AVX1-LABEL: shuffle_v8i32_01235467:
1541 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1544 ; AVX2-LABEL: shuffle_v8i32_01235467:
1546 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1547 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1549 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1550 ret <8 x i32> %shuffle
1553 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1554 ; AVX1-LABEL: shuffle_v8i32_01235466:
1556 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1559 ; AVX2-LABEL: shuffle_v8i32_01235466:
1561 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1562 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1564 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1565 ret <8 x i32> %shuffle
1568 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1569 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
1571 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1574 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
1576 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1577 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1579 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1580 ret <8 x i32> %shuffle
1583 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1584 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1586 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1589 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1591 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1592 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1594 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1595 ret <8 x i32> %shuffle
1598 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1599 ; AVX1-LABEL: shuffle_v8i32_103245uu:
1601 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1604 ; AVX2-LABEL: shuffle_v8i32_103245uu:
1606 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1607 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1609 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1610 ret <8 x i32> %shuffle
1613 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1614 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
1616 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1619 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
1621 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1622 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1624 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1625 ret <8 x i32> %shuffle
1628 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1629 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1631 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1634 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1636 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1637 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1639 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1640 ret <8 x i32> %shuffle
1643 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1644 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1646 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1649 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1651 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1652 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1654 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1655 ret <8 x i32> %shuffle
1658 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1659 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1661 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1662 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1663 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1664 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1665 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1666 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1669 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1671 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
1672 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1673 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1674 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1676 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1677 ret <8 x i32> %shuffle
1680 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1681 ; AVX1-LABEL: shuffle_v8i32_32103210:
1683 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1684 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1687 ; AVX2-LABEL: shuffle_v8i32_32103210:
1689 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
1690 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1692 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1693 ret <8 x i32> %shuffle
1696 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1697 ; AVX1-LABEL: shuffle_v8i32_76547654:
1699 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1700 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1701 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1704 ; AVX2-LABEL: shuffle_v8i32_76547654:
1706 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1707 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1709 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1710 ret <8 x i32> %shuffle
1713 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1714 ; AVX1-LABEL: shuffle_v8i32_76543210:
1716 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1717 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1720 ; AVX2-LABEL: shuffle_v8i32_76543210:
1722 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1723 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1725 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1726 ret <8 x i32> %shuffle
1729 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1730 ; AVX1-LABEL: shuffle_v8i32_3210ba98:
1732 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1733 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1736 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
1738 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1739 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1741 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1742 ret <8 x i32> %shuffle
1745 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1746 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
1748 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1749 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1752 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
1754 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1755 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1757 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1758 ret <8 x i32> %shuffle
1761 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1762 ; AVX1-LABEL: shuffle_v8i32_7654fedc:
1764 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1765 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1768 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
1770 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1771 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1773 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1774 ret <8 x i32> %shuffle
1777 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1778 ; AVX1-LABEL: shuffle_v8i32_fedc7654:
1780 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1781 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1784 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
1786 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1787 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1789 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1790 ret <8 x i32> %shuffle
1793 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1794 ; AVX1-LABEL: shuffle_v8i32_ba987654:
1796 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1797 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1800 ; AVX2-LABEL: shuffle_v8i32_ba987654:
1802 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1803 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1805 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1806 ret <8 x i32> %shuffle
1809 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1810 ; AVX1-LABEL: shuffle_v8i32_ba983210:
1812 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1813 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1816 ; AVX2-LABEL: shuffle_v8i32_ba983210:
1818 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1819 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1821 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1822 ret <8 x i32> %shuffle
1825 define <8 x float> @splat_mem_v8f32_2(float* %p) {
1826 ; ALL-LABEL: splat_mem_v8f32_2:
1828 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
1831 %2 = insertelement <4 x float> undef, float %1, i32 0
1832 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
1836 define <8 x float> @splat_v8f32(<4 x float> %r) {
1837 ; AVX1-LABEL: splat_v8f32:
1839 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1840 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1843 ; AVX2-LABEL: splat_v8f32:
1845 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
1847 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
1852 ; Shuffle to logical bit shifts
1855 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
1856 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
1858 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1859 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1860 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1863 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
1865 ; AVX2-NEXT: vpsllq $32, %ymm0
1867 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
1868 ret <8 x i32> %shuffle
1871 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
1872 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
1874 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1875 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1876 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4],ymm1[5],ymm0[6,7]
1879 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
1881 ; AVX2-NEXT: vpsrlq $32, %ymm0
1883 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
1884 ret <8 x i32> %shuffle
1887 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
1888 ; ALL-LABEL: concat_v2f32_1:
1889 ; ALL: # BB#0: # %entry
1890 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1891 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
1894 %tmp74 = load <2 x float>* %tmp65, align 8
1895 %tmp72 = load <2 x float>* %tmp64, align 8
1896 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1897 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1898 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
1899 ret <8 x float> %tmp76
1902 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
1903 ; ALL-LABEL: concat_v2f32_2:
1904 ; ALL: # BB#0: # %entry
1905 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1906 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
1909 %tmp74 = load <2 x float>* %tmp65, align 8
1910 %tmp72 = load <2 x float>* %tmp64, align 8
1911 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1912 ret <8 x float> %tmp76
1915 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
1916 ; ALL-LABEL: concat_v2f32_3:
1917 ; ALL: # BB#0: # %entry
1918 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1919 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
1922 %tmp74 = load <2 x float>* %tmp65, align 8
1923 %tmp72 = load <2 x float>* %tmp64, align 8
1924 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1925 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1926 ret <8 x float> %res