1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
3 ; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
5 ; CHECK: vextractf128 $0
6 ; CHECK-NEXT: punpcklbw
7 ; CHECK-NEXT: punpckhbw
8 ; CHECK-NEXT: vinsertf128 $1
9 ; CHECK-NEXT: vpermilps $85
10 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
12 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
13 ret <32 x i8> %shuffle
16 ; CHECK: vextractf128 $0
17 ; CHECK-NEXT: punpckhwd
18 ; CHECK-NEXT: vinsertf128 $1
19 ; CHECK-NEXT: vpermilps $85
20 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
22 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
23 ret <16 x i16> %shuffle
28 ; CHECK-NEXT: vinsertf128 $1
29 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
31 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
32 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
33 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
34 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
35 ret <4 x i64> %vecinit6.i
39 ; CHECK-NEXT: vinsertf128 $1
40 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
42 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
43 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
44 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
45 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
46 ret <4 x double> %vecinit6.i
49 ; Test this simple opt:
50 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
52 ; shuffle (vload ptr)), undef, <1, 1, 1, 1>
55 define void @funcE() nounwind {
57 %udx495 = alloca [18 x [18 x float]], align 32
58 br label %for_test505.preheader
60 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
61 br i1 undef, label %for_exit499, label %for_test505.preheader
63 for_exit499: ; preds = %for_test505.preheader
64 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
66 load.i1247: ; preds = %for_exit499
67 %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
68 %ptr.i1237 = bitcast float* %ptr1227 to i32*
69 %val.i1238 = load i32* %ptr.i1237, align 4
70 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
71 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
72 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
73 br label %__load_and_broadcast_32.exit1249
75 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
76 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
81 ; CHECK-NEXT: vinsertf128 $1
82 define <8 x float> @funcF(i32* %ptr) nounwind {
83 %val = load i32* %ptr, align 4
84 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
85 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
86 %tmp = bitcast <8 x i32> %ret7 to <8 x float>