1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
3 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
5 ; CHECK: ## BB#0: ## %entry
6 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
7 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
10 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
11 ret <32 x i8> %shuffle
14 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
16 ; CHECK: ## BB#0: ## %entry
17 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
18 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
21 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
22 ret <16 x i16> %shuffle
25 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
27 ; CHECK: ## BB#0: ## %entry
28 ; CHECK-NEXT: vmovq %rdi, %xmm0
29 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
30 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
33 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
34 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
35 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
36 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
37 ret <4 x i64> %vecinit6.i
40 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
42 ; CHECK: ## BB#0: ## %entry
43 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
44 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
47 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
48 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
49 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
50 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
51 ret <4 x double> %vecinit6.i
54 ; Test this turns into a broadcast:
55 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
57 define <8 x float> @funcE() nounwind {
59 ; CHECK: ## BB#0: ## %for_exit499
60 ; CHECK-NEXT: xorl %eax, %eax
61 ; CHECK-NEXT: ## implicit-def: %YMM0
62 ; CHECK-NEXT: testb %al, %al
63 ; CHECK-NEXT: jne LBB4_2
64 ; CHECK-NEXT: ## BB#1: ## %load.i1247
65 ; CHECK-NEXT: pushq %rbp
66 ; CHECK-NEXT: movq %rsp, %rbp
67 ; CHECK-NEXT: andq $-32, %rsp
68 ; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520
69 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
70 ; CHECK-NEXT: movq %rbp, %rsp
71 ; CHECK-NEXT: popq %rbp
72 ; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249
75 %udx495 = alloca [18 x [18 x float]], align 32
76 br label %for_test505.preheader
78 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
79 br i1 undef, label %for_exit499, label %for_test505.preheader
81 for_exit499: ; preds = %for_test505.preheader
82 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
84 load.i1247: ; preds = %for_exit499
85 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
86 %ptr.i1237 = bitcast float* %ptr1227 to i32*
87 %val.i1238 = load i32, i32* %ptr.i1237, align 4
88 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
89 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
90 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
91 br label %__load_and_broadcast_32.exit1249
93 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
94 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
95 ret <8 x float> %load_broadcast12281250
98 define <8 x float> @funcF(i32 %val) nounwind {
101 ; CHECK-NEXT: vmovd %edi, %xmm0
102 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,0]
103 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
105 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
106 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
107 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
111 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
112 ; CHECK-LABEL: funcG:
113 ; CHECK: ## BB#0: ## %entry
114 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
115 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
118 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
119 ret <8 x float> %shuffle
122 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
123 ; CHECK-LABEL: funcH:
124 ; CHECK: ## BB#0: ## %entry
125 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
126 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
127 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
130 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
131 ret <8 x float> %shuffle
134 define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
135 ; CHECK-LABEL: splat_load_2f64_11:
137 ; CHECK-NEXT: vmovaps (%rdi), %xmm0
138 ; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
140 %x = load <2 x double>, <2 x double>* %ptr
141 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
145 define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
146 ; CHECK-LABEL: splat_load_4f64_2222:
148 ; CHECK-NEXT: vmovapd (%rdi), %ymm0
149 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
150 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
151 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
153 %x = load <4 x double>, <4 x double>* %ptr
154 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
158 define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
159 ; CHECK-LABEL: splat_load_4f32_0000:
161 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
163 %x = load <4 x float>, <4 x float>* %ptr
164 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
168 define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
169 ; CHECK-LABEL: splat_load_8f32_77777777:
171 ; CHECK-NEXT: vmovaps (%rdi), %ymm0
172 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
173 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
174 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
176 %x = load <8 x float>, <8 x float>* %ptr
177 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>