1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
4 define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
6 %add.i = fadd <4 x double> %x, %y
7 ret <4 x double> %add.i
10 ; CHECK: vaddpd LCP{{.*}}(%rip)
11 define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
13 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
14 ret <4 x double> %add.i
18 define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
20 %add.i = fadd <8 x float> %x, %y
21 ret <8 x float> %add.i
24 ; CHECK: vaddps LCP{{.*}}(%rip)
25 define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
27 %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
28 ret <8 x float> %add.i
32 define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
34 %sub.i = fsub <4 x double> %x, %y
35 ret <4 x double> %sub.i
39 define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
41 %tmp2 = load <4 x double>* %x, align 32
42 %sub.i = fsub <4 x double> %y, %tmp2
43 ret <4 x double> %sub.i
47 define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
49 %sub.i = fsub <8 x float> %x, %y
50 ret <8 x float> %sub.i
54 define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
56 %tmp2 = load <8 x float>* %x, align 32
57 %sub.i = fsub <8 x float> %y, %tmp2
58 ret <8 x float> %sub.i
62 define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
64 %mul.i = fmul <4 x double> %x, %y
65 ret <4 x double> %mul.i
68 ; CHECK: vmulpd LCP{{.*}}(%rip)
69 define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
71 %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
72 ret <4 x double> %mul.i
76 define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
78 %mul.i = fmul <8 x float> %x, %y
79 ret <8 x float> %mul.i
82 ; CHECK: vmulps LCP{{.*}}(%rip)
83 define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
85 %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
86 ret <8 x float> %mul.i
90 define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
92 %div.i = fdiv <4 x double> %x, %y
93 ret <4 x double> %div.i
96 ; CHECK: vdivpd LCP{{.*}}(%rip)
97 define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
99 %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
100 ret <4 x double> %div.i
104 define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
106 %div.i = fdiv <8 x float> %x, %y
107 ret <8 x float> %div.i
110 ; CHECK: vdivps LCP{{.*}}(%rip)
111 define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
113 %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
114 ret <8 x float> %div.i
118 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
120 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
124 declare double @sqrt(double) readnone
127 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
129 %call = tail call double @sqrt(double %a) nounwind readnone
133 declare float @sqrtf(float) readnone
136 ; CHECK: vextractf128 $1
137 ; CHECK-NEXT: vextractf128 $1
138 ; CHECK-NEXT: vpaddq %xmm
139 ; CHECK-NEXT: vpaddq %xmm
140 ; CHECK-NEXT: vinsertf128 $1
141 define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
142 %x = add <4 x i64> %i, %j
146 ; CHECK: vextractf128 $1
147 ; CHECK-NEXT: vextractf128 $1
148 ; CHECK-NEXT: vpaddd %xmm
149 ; CHECK-NEXT: vpaddd %xmm
150 ; CHECK-NEXT: vinsertf128 $1
151 define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
152 %x = add <8 x i32> %i, %j
156 ; CHECK: vextractf128 $1
157 ; CHECK-NEXT: vextractf128 $1
158 ; CHECK-NEXT: vpaddw %xmm
159 ; CHECK-NEXT: vpaddw %xmm
160 ; CHECK-NEXT: vinsertf128 $1
161 define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
162 %x = add <16 x i16> %i, %j
166 ; CHECK: vextractf128 $1
167 ; CHECK-NEXT: vextractf128 $1
168 ; CHECK-NEXT: vpaddb %xmm
169 ; CHECK-NEXT: vpaddb %xmm
170 ; CHECK-NEXT: vinsertf128 $1
171 define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
172 %x = add <32 x i8> %i, %j
176 ; CHECK: vextractf128 $1
177 ; CHECK-NEXT: vextractf128 $1
178 ; CHECK-NEXT: vpsubq %xmm
179 ; CHECK-NEXT: vpsubq %xmm
180 ; CHECK-NEXT: vinsertf128 $1
181 define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
182 %x = sub <4 x i64> %i, %j
186 ; CHECK: vextractf128 $1
187 ; CHECK-NEXT: vextractf128 $1
188 ; CHECK-NEXT: vpsubd %xmm
189 ; CHECK-NEXT: vpsubd %xmm
190 ; CHECK-NEXT: vinsertf128 $1
191 define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
192 %x = sub <8 x i32> %i, %j
196 ; CHECK: vextractf128 $1
197 ; CHECK-NEXT: vextractf128 $1
198 ; CHECK-NEXT: vpsubw %xmm
199 ; CHECK-NEXT: vpsubw %xmm
200 ; CHECK-NEXT: vinsertf128 $1
201 define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
202 %x = sub <16 x i16> %i, %j
206 ; CHECK: vextractf128 $1
207 ; CHECK-NEXT: vextractf128 $1
208 ; CHECK-NEXT: vpsubb %xmm
209 ; CHECK-NEXT: vpsubb %xmm
210 ; CHECK-NEXT: vinsertf128 $1
211 define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
212 %x = sub <32 x i8> %i, %j
216 ; CHECK: vextractf128 $1
217 ; CHECK-NEXT: vextractf128 $1
218 ; CHECK-NEXT: vpmulld %xmm
219 ; CHECK-NEXT: vpmulld %xmm
220 ; CHECK-NEXT: vinsertf128 $1
221 define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
222 %x = mul <8 x i32> %i, %j
226 ; CHECK: vextractf128 $1
227 ; CHECK-NEXT: vextractf128 $1
228 ; CHECK-NEXT: vpmullw %xmm
229 ; CHECK-NEXT: vpmullw %xmm
230 ; CHECK-NEXT: vinsertf128 $1
231 define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
232 %x = mul <16 x i16> %i, %j
236 ; CHECK: vextractf128 $1
237 ; CHECK-NEXT: vextractf128 $1
238 ; CHECK-NEXT: vpmuludq %xmm
239 ; CHECK-NEXT: vpsrlq $32, %xmm
240 ; CHECK-NEXT: vpmuludq %xmm
241 ; CHECK-NEXT: vpsllq $32, %xmm
242 ; CHECK-NEXT: vpaddq %xmm
243 ; CHECK-NEXT: vpmuludq %xmm
244 ; CHECK-NEXT: vpsrlq $32, %xmm
245 ; CHECK-NEXT: vpmuludq %xmm
246 ; CHECK-NEXT: vpsllq $32, %xmm
247 ; CHECK-NEXT: vpsrlq $32, %xmm
248 ; CHECK-NEXT: vpmuludq %xmm
249 ; CHECK-NEXT: vpsllq $32, %xmm
250 ; CHECK-NEXT: vpaddq %xmm
251 ; CHECK-NEXT: vpaddq %xmm
252 ; CHECK-NEXT: vpsrlq $32, %xmm
253 ; CHECK-NEXT: vpmuludq %xmm
254 ; CHECK-NEXT: vpsllq $32, %xmm
255 ; CHECK-NEXT: vpaddq %xmm
256 ; CHECK-NEXT: vinsertf128 $1
257 define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
258 %x = mul <4 x i64> %i, %j
262 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
264 define <4 x float> @int_sqrt_ss() {
267 %x0 = load float addrspace(1)* undef, align 8
268 %x1 = insertelement <4 x float> undef, float %x0, i32 0
269 %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind