1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
4 ; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
5 ; CHECK-NEXT: vinsertf128 $1
6 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
8 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
12 ; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
13 ; CHECK-NEXT: vinsertf128 $1
14 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
16 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
17 ret <16 x i16> %shuffle
21 ; CHECK-NEXT: vmovddup %xmm
22 ; CHECK-NEXT: vinsertf128 $1
23 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
25 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
26 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
27 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
28 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
29 ret <4 x i64> %vecinit6.i
32 ; CHECK: vmovddup %xmm
33 ; CHECK-NEXT: vinsertf128 $1
34 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
36 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
37 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
38 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
39 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
40 ret <4 x double> %vecinit6.i
43 ; Test this turns into a broadcast:
44 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
47 define <8 x float> @funcE() nounwind {
49 %udx495 = alloca [18 x [18 x float]], align 32
50 br label %for_test505.preheader
52 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
53 br i1 undef, label %for_exit499, label %for_test505.preheader
55 for_exit499: ; preds = %for_test505.preheader
56 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
58 load.i1247: ; preds = %for_exit499
59 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
60 %ptr.i1237 = bitcast float* %ptr1227 to i32*
61 %val.i1238 = load i32, i32* %ptr.i1237, align 4
62 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
63 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
64 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
65 br label %__load_and_broadcast_32.exit1249
67 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
68 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
69 ret <8 x float> %load_broadcast12281250
73 ; CHECK-NEXT: vinsertf128 $1
74 define <8 x float> @funcF(i32 %val) nounwind {
75 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
76 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
77 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
82 ; CHECK-NEXT: vinsertf128 $1
83 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
85 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
86 ret <8 x float> %shuffle
89 ; CHECK: vextractf128 $1
90 ; CHECK-NEXT: vpermilps $85
91 ; CHECK-NEXT: vinsertf128 $1
92 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
94 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
95 ret <8 x float> %shuffle