1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8 target triple = "x86_64-unknown-unknown"
10 define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) {
11 ; SSE-LABEL: shuffle_v4i32_0001:
13 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
16 ; AVX-LABEL: shuffle_v4i32_0001:
18 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
20 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
21 ret <4 x i32> %shuffle
23 define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
24 ; SSE-LABEL: shuffle_v4i32_0020:
26 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
29 ; AVX-LABEL: shuffle_v4i32_0020:
31 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
33 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
34 ret <4 x i32> %shuffle
36 define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
37 ; SSE-LABEL: shuffle_v4i32_0112:
39 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
42 ; AVX-LABEL: shuffle_v4i32_0112:
44 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
46 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
47 ret <4 x i32> %shuffle
49 define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
50 ; SSE-LABEL: shuffle_v4i32_0300:
52 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
55 ; AVX-LABEL: shuffle_v4i32_0300:
57 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
59 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
60 ret <4 x i32> %shuffle
62 define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) {
63 ; SSE-LABEL: shuffle_v4i32_1000:
65 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
68 ; AVX-LABEL: shuffle_v4i32_1000:
70 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
72 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
73 ret <4 x i32> %shuffle
75 define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) {
76 ; SSE-LABEL: shuffle_v4i32_2200:
78 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
81 ; AVX-LABEL: shuffle_v4i32_2200:
83 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
85 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
86 ret <4 x i32> %shuffle
88 define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) {
89 ; SSE-LABEL: shuffle_v4i32_3330:
91 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
94 ; AVX-LABEL: shuffle_v4i32_3330:
96 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
98 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
99 ret <4 x i32> %shuffle
101 define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
102 ; SSE-LABEL: shuffle_v4i32_3210:
104 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
107 ; AVX-LABEL: shuffle_v4i32_3210:
109 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
111 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
112 ret <4 x i32> %shuffle
115 define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
116 ; SSE-LABEL: shuffle_v4i32_2121:
118 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
121 ; AVX-LABEL: shuffle_v4i32_2121:
123 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
125 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
126 ret <4 x i32> %shuffle
129 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
130 ; SSE-LABEL: shuffle_v4f32_0001:
132 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
135 ; AVX-LABEL: shuffle_v4f32_0001:
137 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
139 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
140 ret <4 x float> %shuffle
142 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
143 ; SSE-LABEL: shuffle_v4f32_0020:
145 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
148 ; AVX-LABEL: shuffle_v4f32_0020:
150 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
152 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
153 ret <4 x float> %shuffle
155 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
156 ; SSE-LABEL: shuffle_v4f32_0300:
158 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
161 ; AVX-LABEL: shuffle_v4f32_0300:
163 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
165 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
166 ret <4 x float> %shuffle
168 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
169 ; SSE-LABEL: shuffle_v4f32_1000:
171 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
174 ; AVX-LABEL: shuffle_v4f32_1000:
176 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
178 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
179 ret <4 x float> %shuffle
181 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
182 ; SSE-LABEL: shuffle_v4f32_2200:
184 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
187 ; AVX-LABEL: shuffle_v4f32_2200:
189 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
191 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
192 ret <4 x float> %shuffle
194 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
195 ; SSE-LABEL: shuffle_v4f32_3330:
197 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
200 ; AVX-LABEL: shuffle_v4f32_3330:
202 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
204 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
205 ret <4 x float> %shuffle
207 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
208 ; SSE-LABEL: shuffle_v4f32_3210:
210 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
213 ; AVX-LABEL: shuffle_v4f32_3210:
215 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
217 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
218 ret <4 x float> %shuffle
220 define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
221 ; SSE-LABEL: shuffle_v4f32_0011:
223 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
226 ; AVX-LABEL: shuffle_v4f32_0011:
228 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
230 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
231 ret <4 x float> %shuffle
233 define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
234 ; SSE-LABEL: shuffle_v4f32_2233:
236 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
239 ; AVX-LABEL: shuffle_v4f32_2233:
241 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
243 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
244 ret <4 x float> %shuffle
246 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
247 ; SSE2-LABEL: shuffle_v4f32_0022:
249 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
252 ; SSE3-LABEL: shuffle_v4f32_0022:
254 ; SSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
257 ; SSSE3-LABEL: shuffle_v4f32_0022:
259 ; SSSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
262 ; SSE41-LABEL: shuffle_v4f32_0022:
264 ; SSE41-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
267 ; AVX-LABEL: shuffle_v4f32_0022:
269 ; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
271 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
272 ret <4 x float> %shuffle
274 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
275 ; SSE2-LABEL: shuffle_v4f32_1133:
277 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
280 ; SSE3-LABEL: shuffle_v4f32_1133:
282 ; SSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
285 ; SSSE3-LABEL: shuffle_v4f32_1133:
287 ; SSSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
290 ; SSE41-LABEL: shuffle_v4f32_1133:
292 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
295 ; AVX-LABEL: shuffle_v4f32_1133:
297 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
299 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
300 ret <4 x float> %shuffle
303 define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
304 ; SSE2-LABEL: shuffle_v4i32_0124:
306 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
307 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
310 ; SSE3-LABEL: shuffle_v4i32_0124:
312 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
313 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
316 ; SSSE3-LABEL: shuffle_v4i32_0124:
318 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
319 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
322 ; SSE41-LABEL: shuffle_v4i32_0124:
324 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
327 ; AVX-LABEL: shuffle_v4i32_0124:
329 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
331 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
332 ret <4 x i32> %shuffle
334 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
335 ; SSE-LABEL: shuffle_v4i32_0142:
337 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
338 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
341 ; AVX-LABEL: shuffle_v4i32_0142:
343 ; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
344 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
346 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
347 ret <4 x i32> %shuffle
349 define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
350 ; SSE-LABEL: shuffle_v4i32_0412:
352 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
353 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
354 ; SSE-NEXT: movaps %xmm1, %xmm0
357 ; AVX-LABEL: shuffle_v4i32_0412:
359 ; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
360 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[1,2]
362 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
363 ret <4 x i32> %shuffle
365 define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
366 ; SSE-LABEL: shuffle_v4i32_4012:
368 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
369 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
370 ; SSE-NEXT: movaps %xmm1, %xmm0
373 ; AVX-LABEL: shuffle_v4i32_4012:
375 ; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
376 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,2]
378 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
379 ret <4 x i32> %shuffle
381 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
382 ; SSE-LABEL: shuffle_v4i32_0145:
384 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
387 ; AVX-LABEL: shuffle_v4i32_0145:
389 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
391 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
392 ret <4 x i32> %shuffle
394 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
395 ; SSE-LABEL: shuffle_v4i32_0451:
397 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
398 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
401 ; AVX-LABEL: shuffle_v4i32_0451:
403 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
404 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
406 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
407 ret <4 x i32> %shuffle
409 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
410 ; SSE-LABEL: shuffle_v4i32_4501:
412 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
413 ; SSE-NEXT: movdqa %xmm1, %xmm0
416 ; AVX-LABEL: shuffle_v4i32_4501:
418 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
420 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
421 ret <4 x i32> %shuffle
423 define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
424 ; SSE-LABEL: shuffle_v4i32_4015:
426 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
427 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
430 ; AVX-LABEL: shuffle_v4i32_4015:
432 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
433 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
435 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
436 ret <4 x i32> %shuffle
439 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
440 ; SSE2-LABEL: shuffle_v4f32_4zzz:
442 ; SSE2-NEXT: xorps %xmm1, %xmm1
443 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
444 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
447 ; SSE3-LABEL: shuffle_v4f32_4zzz:
449 ; SSE3-NEXT: xorps %xmm1, %xmm1
450 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
451 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
454 ; SSSE3-LABEL: shuffle_v4f32_4zzz:
456 ; SSSE3-NEXT: xorps %xmm1, %xmm1
457 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
458 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
461 ; SSE41-LABEL: shuffle_v4f32_4zzz:
463 ; SSE41-NEXT: xorps %xmm1, %xmm1
464 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
465 ; SSE41-NEXT: movaps %xmm1, %xmm0
468 ; AVX-LABEL: shuffle_v4f32_4zzz:
470 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
471 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
473 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
474 ret <4 x float> %shuffle
477 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
478 ; SSE2-LABEL: shuffle_v4f32_z4zz:
480 ; SSE2-NEXT: xorps %xmm1, %xmm1
481 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
482 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
485 ; SSE3-LABEL: shuffle_v4f32_z4zz:
487 ; SSE3-NEXT: xorps %xmm1, %xmm1
488 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
489 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
492 ; SSSE3-LABEL: shuffle_v4f32_z4zz:
494 ; SSSE3-NEXT: xorps %xmm1, %xmm1
495 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
496 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
499 ; SSE41-LABEL: shuffle_v4f32_z4zz:
501 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
504 ; AVX-LABEL: shuffle_v4f32_z4zz:
506 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
508 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
509 ret <4 x float> %shuffle
512 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
513 ; SSE2-LABEL: shuffle_v4f32_zz4z:
515 ; SSE2-NEXT: xorps %xmm1, %xmm1
516 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
517 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
518 ; SSE2-NEXT: movaps %xmm1, %xmm0
521 ; SSE3-LABEL: shuffle_v4f32_zz4z:
523 ; SSE3-NEXT: xorps %xmm1, %xmm1
524 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
525 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
526 ; SSE3-NEXT: movaps %xmm1, %xmm0
529 ; SSSE3-LABEL: shuffle_v4f32_zz4z:
531 ; SSSE3-NEXT: xorps %xmm1, %xmm1
532 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
533 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
534 ; SSSE3-NEXT: movaps %xmm1, %xmm0
537 ; SSE41-LABEL: shuffle_v4f32_zz4z:
539 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
542 ; AVX-LABEL: shuffle_v4f32_zz4z:
544 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
546 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
547 ret <4 x float> %shuffle
550 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
551 ; SSE2-LABEL: shuffle_v4f32_zuu4:
553 ; SSE2-NEXT: xorps %xmm1, %xmm1
554 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
555 ; SSE2-NEXT: movaps %xmm1, %xmm0
558 ; SSE3-LABEL: shuffle_v4f32_zuu4:
560 ; SSE3-NEXT: xorps %xmm1, %xmm1
561 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
562 ; SSE3-NEXT: movaps %xmm1, %xmm0
565 ; SSSE3-LABEL: shuffle_v4f32_zuu4:
567 ; SSSE3-NEXT: xorps %xmm1, %xmm1
568 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
569 ; SSSE3-NEXT: movaps %xmm1, %xmm0
572 ; SSE41-LABEL: shuffle_v4f32_zuu4:
574 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
577 ; AVX-LABEL: shuffle_v4f32_zuu4:
579 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
581 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
582 ret <4 x float> %shuffle
585 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
586 ; SSE2-LABEL: shuffle_v4f32_zzz7:
588 ; SSE2-NEXT: xorps %xmm1, %xmm1
589 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
590 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
591 ; SSE2-NEXT: movaps %xmm1, %xmm0
594 ; SSE3-LABEL: shuffle_v4f32_zzz7:
596 ; SSE3-NEXT: xorps %xmm1, %xmm1
597 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
598 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
599 ; SSE3-NEXT: movaps %xmm1, %xmm0
602 ; SSSE3-LABEL: shuffle_v4f32_zzz7:
604 ; SSSE3-NEXT: xorps %xmm1, %xmm1
605 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
606 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
607 ; SSSE3-NEXT: movaps %xmm1, %xmm0
610 ; SSE41-LABEL: shuffle_v4f32_zzz7:
612 ; SSE41-NEXT: xorps %xmm1, %xmm1
613 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3]
614 ; SSE41-NEXT: movaps %xmm1, %xmm0
617 ; AVX-LABEL: shuffle_v4f32_zzz7:
619 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
620 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
622 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
623 ret <4 x float> %shuffle
626 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
627 ; SSE2-LABEL: shuffle_v4f32_z6zz:
629 ; SSE2-NEXT: xorps %xmm1, %xmm1
630 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
631 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
634 ; SSE3-LABEL: shuffle_v4f32_z6zz:
636 ; SSE3-NEXT: xorps %xmm1, %xmm1
637 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
638 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
641 ; SSSE3-LABEL: shuffle_v4f32_z6zz:
643 ; SSSE3-NEXT: xorps %xmm1, %xmm1
644 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
645 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
648 ; SSE41-LABEL: shuffle_v4f32_z6zz:
650 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
653 ; AVX-LABEL: shuffle_v4f32_z6zz:
655 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
657 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
658 ret <4 x float> %shuffle
661 define <4 x i32> @shuffle_v4i32_4zzz(i32 %i) {
662 ; SSE-LABEL: shuffle_v4i32_4zzz:
664 ; SSE-NEXT: movd %edi, %xmm0
667 ; AVX-LABEL: shuffle_v4i32_4zzz:
669 ; AVX-NEXT: vmovd %edi, %xmm0
671 %a = insertelement <4 x i32> undef, i32 %i, i32 0
672 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
673 ret <4 x i32> %shuffle
676 define <4 x i32> @shuffle_v4i32_z4zz(i32 %i) {
677 ; SSE-LABEL: shuffle_v4i32_z4zz:
679 ; SSE-NEXT: movd %edi, %xmm0
680 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
683 ; AVX-LABEL: shuffle_v4i32_z4zz:
685 ; AVX-NEXT: vmovd %edi, %xmm0
686 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
688 %a = insertelement <4 x i32> undef, i32 %i, i32 0
689 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
690 ret <4 x i32> %shuffle
693 define <4 x i32> @shuffle_v4i32_zz4z(i32 %i) {
694 ; SSE-LABEL: shuffle_v4i32_zz4z:
696 ; SSE-NEXT: movd %edi, %xmm0
697 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
700 ; AVX-LABEL: shuffle_v4i32_zz4z:
702 ; AVX-NEXT: vmovd %edi, %xmm0
703 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
705 %a = insertelement <4 x i32> undef, i32 %i, i32 0
706 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
707 ret <4 x i32> %shuffle
710 define <4 x i32> @shuffle_v4i32_zuu4(i32 %i) {
711 ; SSE-LABEL: shuffle_v4i32_zuu4:
713 ; SSE-NEXT: movd %edi, %xmm0
714 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,0]
717 ; AVX-LABEL: shuffle_v4i32_zuu4:
719 ; AVX-NEXT: vmovd %edi, %xmm0
720 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,0]
722 %a = insertelement <4 x i32> undef, i32 %i, i32 0
723 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
724 ret <4 x i32> %shuffle
727 define <4 x i32> @shuffle_v4i32_z6zz(i32 %i) {
728 ; SSE-LABEL: shuffle_v4i32_z6zz:
730 ; SSE-NEXT: movd %edi, %xmm0
731 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
734 ; AVX-LABEL: shuffle_v4i32_z6zz:
736 ; AVX-NEXT: vmovd %edi, %xmm0
737 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
739 %a = insertelement <4 x i32> undef, i32 %i, i32 2
740 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
741 ret <4 x i32> %shuffle
744 define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
745 ; SSE2-LABEL: shuffle_v4i32_7012:
747 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
748 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
749 ; SSE2-NEXT: movaps %xmm1, %xmm0
752 ; SSE3-LABEL: shuffle_v4i32_7012:
754 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
755 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
756 ; SSE3-NEXT: movaps %xmm1, %xmm0
759 ; SSSE3-LABEL: shuffle_v4i32_7012:
761 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
764 ; SSE41-LABEL: shuffle_v4i32_7012:
766 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
769 ; AVX-LABEL: shuffle_v4i32_7012:
771 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
773 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
774 ret <4 x i32> %shuffle
777 define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
778 ; SSE2-LABEL: shuffle_v4i32_6701:
780 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
781 ; SSE2-NEXT: movapd %xmm1, %xmm0
784 ; SSE3-LABEL: shuffle_v4i32_6701:
786 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
787 ; SSE3-NEXT: movapd %xmm1, %xmm0
790 ; SSSE3-LABEL: shuffle_v4i32_6701:
792 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
795 ; SSE41-LABEL: shuffle_v4i32_6701:
797 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
800 ; AVX-LABEL: shuffle_v4i32_6701:
802 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
804 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
805 ret <4 x i32> %shuffle
808 define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) {
809 ; SSE2-LABEL: shuffle_v4i32_5670:
811 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
812 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
813 ; SSE2-NEXT: movaps %xmm1, %xmm0
816 ; SSE3-LABEL: shuffle_v4i32_5670:
818 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
819 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
820 ; SSE3-NEXT: movaps %xmm1, %xmm0
823 ; SSSE3-LABEL: shuffle_v4i32_5670:
825 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
828 ; SSE41-LABEL: shuffle_v4i32_5670:
830 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
833 ; AVX-LABEL: shuffle_v4i32_5670:
835 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
837 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
838 ret <4 x i32> %shuffle
841 define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
842 ; SSE2-LABEL: shuffle_v4i32_1234:
844 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
845 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
848 ; SSE3-LABEL: shuffle_v4i32_1234:
850 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
851 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
854 ; SSSE3-LABEL: shuffle_v4i32_1234:
856 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
857 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
860 ; SSE41-LABEL: shuffle_v4i32_1234:
862 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
863 ; SSE41-NEXT: movdqa %xmm1, %xmm0
866 ; AVX-LABEL: shuffle_v4i32_1234:
868 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
870 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
871 ret <4 x i32> %shuffle
874 define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
875 ; SSE2-LABEL: shuffle_v4i32_2345:
877 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
880 ; SSE3-LABEL: shuffle_v4i32_2345:
882 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
885 ; SSSE3-LABEL: shuffle_v4i32_2345:
887 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
888 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
891 ; SSE41-LABEL: shuffle_v4i32_2345:
893 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
894 ; SSE41-NEXT: movdqa %xmm1, %xmm0
897 ; AVX-LABEL: shuffle_v4i32_2345:
899 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
901 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
902 ret <4 x i32> %shuffle
905 define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
906 ; SSE2-LABEL: shuffle_v4i32_3456:
908 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
909 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
912 ; SSE3-LABEL: shuffle_v4i32_3456:
914 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
915 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
918 ; SSSE3-LABEL: shuffle_v4i32_3456:
920 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
921 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
924 ; SSE41-LABEL: shuffle_v4i32_3456:
926 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
927 ; SSE41-NEXT: movdqa %xmm1, %xmm0
930 ; AVX-LABEL: shuffle_v4i32_3456:
932 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
934 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
935 ret <4 x i32> %shuffle
938 define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
939 ; SSE-LABEL: shuffle_v4i32_0u1u:
941 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
944 ; AVX-LABEL: shuffle_v4i32_0u1u:
946 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
948 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
949 ret <4 x i32> %shuffle
952 define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
953 ; SSE2-LABEL: shuffle_v4i32_0z1z:
955 ; SSE2-NEXT: pxor %xmm1, %xmm1
956 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
959 ; SSE3-LABEL: shuffle_v4i32_0z1z:
961 ; SSE3-NEXT: pxor %xmm1, %xmm1
962 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
965 ; SSSE3-LABEL: shuffle_v4i32_0z1z:
967 ; SSSE3-NEXT: pxor %xmm1, %xmm1
968 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
971 ; SSE41-LABEL: shuffle_v4i32_0z1z:
973 ; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
976 ; AVX-LABEL: shuffle_v4i32_0z1z:
978 ; AVX-NEXT: vpmovzxdq %xmm0, %xmm0
980 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
981 ret <4 x i32> %shuffle
984 define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
985 ; SSE-LABEL: insert_reg_and_zero_v4i32:
987 ; SSE-NEXT: movd %edi, %xmm0
990 ; AVX-LABEL: insert_reg_and_zero_v4i32:
992 ; AVX-NEXT: vmovd %edi, %xmm0
994 %v = insertelement <4 x i32> undef, i32 %a, i32 0
995 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
996 ret <4 x i32> %shuffle
999 define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
1000 ; SSE-LABEL: insert_mem_and_zero_v4i32:
1002 ; SSE-NEXT: movd (%rdi), %xmm0
1005 ; AVX-LABEL: insert_mem_and_zero_v4i32:
1007 ; AVX-NEXT: vmovd (%rdi), %xmm0
1010 %v = insertelement <4 x i32> undef, i32 %a, i32 0
1011 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1012 ret <4 x i32> %shuffle
1015 define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
1016 ; SSE-LABEL: insert_reg_and_zero_v4f32:
1018 ; SSE-NEXT: xorps %xmm1, %xmm1
1019 ; SSE-NEXT: movss %xmm0, %xmm1
1020 ; SSE-NEXT: movaps %xmm1, %xmm0
1023 ; AVX-LABEL: insert_reg_and_zero_v4f32:
1025 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1026 ; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
1028 %v = insertelement <4 x float> undef, float %a, i32 0
1029 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1030 ret <4 x float> %shuffle
1033 define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
1034 ; SSE-LABEL: insert_mem_and_zero_v4f32:
1036 ; SSE-NEXT: movss (%rdi), %xmm0
1039 ; AVX-LABEL: insert_mem_and_zero_v4f32:
1041 ; AVX-NEXT: vmovss (%rdi), %xmm0
1043 %a = load float* %ptr
1044 %v = insertelement <4 x float> undef, float %a, i32 0
1045 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1046 ret <4 x float> %shuffle
1049 define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
1050 ; SSE-LABEL: shuffle_mem_v4f32_3210:
1052 ; SSE-NEXT: movaps (%rdi), %xmm0
1053 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1056 ; AVX-LABEL: shuffle_mem_v4f32_3210:
1058 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
1060 %a = load <4 x float>* %ptr
1061 %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1062 ret <4 x float> %shuffle