1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
4 ; CHECK: vpunpcklbw %xmm
5 ; CHECK-NEXT: vpunpckhbw %xmm
6 ; CHECK-NEXT: vinsertf128 $1
7 ; CHECK-NEXT: vpermilps $85
8 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
10 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
11 ret <32 x i8> %shuffle
14 ; CHECK: vpunpckhwd %xmm
15 ; CHECK-NEXT: vinsertf128 $1
16 ; CHECK-NEXT: vpermilps $85
17 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
19 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
20 ret <16 x i16> %shuffle
24 ; CHECK-NEXT: vmovlhps %xmm
25 ; CHECK-NEXT: vinsertf128 $1
26 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
28 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
29 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
30 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
31 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
32 ret <4 x i64> %vecinit6.i
36 ; CHECK-NEXT: vinsertf128 $1
37 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
39 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
40 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
41 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
42 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
43 ret <4 x double> %vecinit6.i
46 ; Test this simple opt:
47 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
49 ; shuffle (vload ptr)), undef, <1, 1, 1, 1>
51 ; CHECK-NEXT: vinsertf128 $1
52 ; CHECK-NEXT: vpermilps $-1
53 define <8 x float> @funcE() nounwind {
55 %udx495 = alloca [18 x [18 x float]], align 32
56 br label %for_test505.preheader
58 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
59 br i1 undef, label %for_exit499, label %for_test505.preheader
61 for_exit499: ; preds = %for_test505.preheader
62 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
64 load.i1247: ; preds = %for_exit499
65 %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
66 %ptr.i1237 = bitcast float* %ptr1227 to i32*
67 %val.i1238 = load i32* %ptr.i1237, align 4
68 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
69 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
70 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
71 br label %__load_and_broadcast_32.exit1249
73 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
74 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
75 ret <8 x float> %load_broadcast12281250
78 ; CHECK: vinsertf128 $1
79 ; CHECK-NEXT: vpermilps $0
80 define <8 x float> @funcF(i32 %val) nounwind {
81 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
82 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
83 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
87 ; CHECK: vinsertf128 $1
88 ; CHECK-NEXT: vpermilps $0
89 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
91 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
92 ret <8 x float> %shuffle
95 ; CHECK: vextractf128 $1
96 ; CHECK-NEXT: vinsertf128 $1
97 ; CHECK-NEXT: vpermilps $85
98 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
100 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
101 ret <8 x float> %shuffle