1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 ; CHECK-LABEL: addpd512
6 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
8 %add.i = fadd <8 x double> %x, %y
9 ret <8 x double> %add.i
12 ; CHECK-LABEL: addpd512fold
13 ; CHECK: vaddpd LCP{{.*}}(%rip)
15 define <8 x double> @addpd512fold(<8 x double> %y) {
17 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
18 ret <8 x double> %add.i
21 ; CHECK-LABEL: addps512
24 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
26 %add.i = fadd <16 x float> %x, %y
27 ret <16 x float> %add.i
30 ; CHECK-LABEL: addps512fold
31 ; CHECK: vaddps LCP{{.*}}(%rip)
33 define <16 x float> @addps512fold(<16 x float> %y) {
35 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
36 ret <16 x float> %add.i
39 ; CHECK-LABEL: subpd512
42 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
44 %sub.i = fsub <8 x double> %x, %y
45 ret <8 x double> %sub.i
48 ; CHECK-LABEL: @subpd512fold
51 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
53 %tmp2 = load <8 x double>* %x, align 8
54 %sub.i = fsub <8 x double> %y, %tmp2
55 ret <8 x double> %sub.i
58 ; CHECK-LABEL: @subps512
61 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
63 %sub.i = fsub <16 x float> %x, %y
64 ret <16 x float> %sub.i
67 ; CHECK-LABEL: subps512fold
70 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
72 %tmp2 = load <16 x float>* %x, align 4
73 %sub.i = fsub <16 x float> %y, %tmp2
74 ret <16 x float> %sub.i
77 ; CHECK-LABEL: imulq512
81 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
82 %z = mul <8 x i64>%x, %y
86 ; CHECK-LABEL: mulpd512
89 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
91 %mul.i = fmul <8 x double> %x, %y
92 ret <8 x double> %mul.i
95 ; CHECK-LABEL: mulpd512fold
96 ; CHECK: vmulpd LCP{{.*}}(%rip)
98 define <8 x double> @mulpd512fold(<8 x double> %y) {
100 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
101 ret <8 x double> %mul.i
104 ; CHECK-LABEL: mulps512
107 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
109 %mul.i = fmul <16 x float> %x, %y
110 ret <16 x float> %mul.i
113 ; CHECK-LABEL: mulps512fold
114 ; CHECK: vmulps LCP{{.*}}(%rip)
116 define <16 x float> @mulps512fold(<16 x float> %y) {
118 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
119 ret <16 x float> %mul.i
122 ; CHECK-LABEL: divpd512
125 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
127 %div.i = fdiv <8 x double> %x, %y
128 ret <8 x double> %div.i
131 ; CHECK-LABEL: divpd512fold
132 ; CHECK: vdivpd LCP{{.*}}(%rip)
134 define <8 x double> @divpd512fold(<8 x double> %y) {
136 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
137 ret <8 x double> %div.i
140 ; CHECK-LABEL: divps512
143 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
145 %div.i = fdiv <16 x float> %x, %y
146 ret <16 x float> %div.i
149 ; CHECK-LABEL: divps512fold
150 ; CHECK: vdivps LCP{{.*}}(%rip)
152 define <16 x float> @divps512fold(<16 x float> %y) {
154 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
155 ret <16 x float> %div.i
158 ; CHECK-LABEL: vpaddq_test
161 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
162 %x = add <8 x i64> %i, %j
166 ; CHECK-LABEL: vpaddq_fold_test
169 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
170 %tmp = load <8 x i64>* %j, align 4
171 %x = add <8 x i64> %i, %tmp
175 ; CHECK-LABEL: vpaddq_broadcast_test
176 ; CHECK: vpaddq LCP{{.*}}(%rip){1to8}
178 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
179 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
183 ; CHECK-LABEL: vpaddq_broadcast2_test
184 ; CHECK: vpaddq (%rdi){1to8}
186 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
188 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
189 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
190 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
191 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
192 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
193 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
194 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
195 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
196 %x = add <8 x i64> %i, %j.7
200 ; CHECK-LABEL: vpaddd_test
203 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
204 %x = add <16 x i32> %i, %j
208 ; CHECK-LABEL: vpaddd_fold_test
211 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
212 %tmp = load <16 x i32>* %j, align 4
213 %x = add <16 x i32> %i, %tmp
217 ; CHECK-LABEL: vpaddd_broadcast_test
218 ; CHECK: vpaddd LCP{{.*}}(%rip){1to16}
220 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
221 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
225 ; CHECK-LABEL: vpaddd_mask_test
226 ; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
228 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
229 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
230 %x = add <16 x i32> %i, %j
231 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
235 ; CHECK-LABEL: vpaddd_maskz_test
236 ; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z} }}
238 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
239 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
240 %x = add <16 x i32> %i, %j
241 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
245 ; CHECK-LABEL: vpaddd_mask_fold_test
246 ; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
248 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
249 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
250 %j = load <16 x i32>* %j.ptr
251 %x = add <16 x i32> %i, %j
252 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
256 ; CHECK-LABEL: vpaddd_mask_broadcast_test
257 ; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
259 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
260 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
261 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
262 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
266 ; CHECK-LABEL: vpaddd_maskz_fold_test
267 ; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
269 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
270 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
271 %j = load <16 x i32>* %j.ptr
272 %x = add <16 x i32> %i, %j
273 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
277 ; CHECK-LABEL: vpaddd_maskz_broadcast_test
278 ; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
280 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
281 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
282 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
283 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
287 ; CHECK-LABEL: vpsubq_test
290 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
291 %x = sub <8 x i64> %i, %j
295 ; CHECK-LABEL: vpsubd_test
298 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
299 %x = sub <16 x i32> %i, %j
303 ; CHECK-LABEL: vpmulld_test
304 ; CHECK: vpmulld %zmm
306 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
307 %x = mul <16 x i32> %i, %j
312 ; CHECK: vsqrtss {{.*}} encoding: [0x62
314 declare float @sqrtf(float) readnone
315 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
317 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
322 ; CHECK: vsqrtsd {{.*}}## encoding: [0x62
324 declare double @sqrt(double) readnone
325 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
327 %call = tail call double @sqrt(double %a) nounwind readnone
332 ; CHECK: vsqrtss {{.*}}## encoding: [0x62
334 declare float @llvm.sqrt.f32(float)
335 define float @sqrtC(float %a) nounwind {
336 %b = call float @llvm.sqrt.f32(float %a)
341 ; CHECK: vsqrtps {{.*}}
343 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
344 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
345 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
350 ; CHECK: vsqrtpd {{.*}}
352 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
353 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
354 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
358 ; CHECK-LABEL: fadd_broadcast
359 ; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0
361 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
362 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
366 ; CHECK-LABEL: addq_broadcast
367 ; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
369 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
370 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
374 ; CHECK-LABEL: orq_broadcast
375 ; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
377 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
378 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
382 ; CHECK-LABEL: andd512fold
385 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
387 %a = load <16 x i32>* %x, align 4
388 %b = and <16 x i32> %y, %a
392 ; CHECK-LABEL: andqbrst
393 ; CHECK: vpandq (%rdi){1to8}, %zmm
395 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
397 %a = load i64* %ap, align 8
398 %b = insertelement <8 x i64> undef, i64 %a, i32 0
399 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
400 %d = and <8 x i64> %p1, %c