1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
3 ; CHECK-LABEL: addpd512
6 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
8 %add.i = fadd <8 x double> %x, %y
9 ret <8 x double> %add.i
12 ; CHECK-LABEL: addpd512fold
13 ; CHECK: vaddpd LCP{{.*}}(%rip)
15 define <8 x double> @addpd512fold(<8 x double> %y) {
17 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
18 ret <8 x double> %add.i
21 ; CHECK-LABEL: addps512
24 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
26 %add.i = fadd <16 x float> %x, %y
27 ret <16 x float> %add.i
30 ; CHECK-LABEL: addps512fold
31 ; CHECK: vaddps LCP{{.*}}(%rip)
33 define <16 x float> @addps512fold(<16 x float> %y) {
35 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
36 ret <16 x float> %add.i
39 ; CHECK-LABEL: subpd512
42 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
44 %sub.i = fsub <8 x double> %x, %y
45 ret <8 x double> %sub.i
48 ; CHECK-LABEL: @subpd512fold
51 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
53 %tmp2 = load <8 x double>* %x, align 8
54 %sub.i = fsub <8 x double> %y, %tmp2
55 ret <8 x double> %sub.i
58 ; CHECK-LABEL: @subps512
61 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
63 %sub.i = fsub <16 x float> %x, %y
64 ret <16 x float> %sub.i
67 ; CHECK-LABEL: subps512fold
70 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
72 %tmp2 = load <16 x float>* %x, align 4
73 %sub.i = fsub <16 x float> %y, %tmp2
74 ret <16 x float> %sub.i
77 ; CHECK-LABEL: mulpd512
80 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
82 %mul.i = fmul <8 x double> %x, %y
83 ret <8 x double> %mul.i
86 ; CHECK-LABEL: mulpd512fold
87 ; CHECK: vmulpd LCP{{.*}}(%rip)
89 define <8 x double> @mulpd512fold(<8 x double> %y) {
91 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
92 ret <8 x double> %mul.i
95 ; CHECK-LABEL: mulps512
98 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
100 %mul.i = fmul <16 x float> %x, %y
101 ret <16 x float> %mul.i
104 ; CHECK-LABEL: mulps512fold
105 ; CHECK: vmulps LCP{{.*}}(%rip)
107 define <16 x float> @mulps512fold(<16 x float> %y) {
109 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
110 ret <16 x float> %mul.i
113 ; CHECK-LABEL: divpd512
116 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
118 %div.i = fdiv <8 x double> %x, %y
119 ret <8 x double> %div.i
122 ; CHECK-LABEL: divpd512fold
123 ; CHECK: vdivpd LCP{{.*}}(%rip)
125 define <8 x double> @divpd512fold(<8 x double> %y) {
127 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
128 ret <8 x double> %div.i
131 ; CHECK-LABEL: divps512
134 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
136 %div.i = fdiv <16 x float> %x, %y
137 ret <16 x float> %div.i
140 ; CHECK-LABEL: divps512fold
141 ; CHECK: vdivps LCP{{.*}}(%rip)
143 define <16 x float> @divps512fold(<16 x float> %y) {
145 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
146 ret <16 x float> %div.i
149 ; CHECK-LABEL: vpaddq_test
152 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
153 %x = add <8 x i64> %i, %j
157 ; CHECK-LABEL: vpaddd_test
160 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
161 %x = add <16 x i32> %i, %j
165 ; CHECK-LABEL: vpsubq_test
168 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
169 %x = sub <8 x i64> %i, %j
173 ; CHECK-LABEL: vpsubd_test
176 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
177 %x = sub <16 x i32> %i, %j
181 ; CHECK-LABEL: vpmulld_test
182 ; CHECK: vpmulld %zmm
184 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
185 %x = mul <16 x i32> %i, %j
192 declare float @sqrtf(float) readnone
193 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
195 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
202 declare double @sqrt(double) readnone
203 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
205 %call = tail call double @sqrt(double %a) nounwind readnone
212 declare float @llvm.sqrt.f32(float)
213 define float @sqrtC(float %a) nounwind {
214 %b = call float @llvm.sqrt.f32(float %a)
218 ; CHECK-LABEL: fadd_broadcast
219 ; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0
221 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
222 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
226 ; CHECK-LABEL: addq_broadcast
227 ; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
229 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
230 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
234 ; CHECK-LABEL: orq_broadcast
235 ; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
237 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
238 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
242 ; CHECK-LABEL: andd512fold
245 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
247 %a = load <16 x i32>* %x, align 4
248 %b = and <16 x i32> %y, %a
252 ; CHECK-LABEL: andqbrst
253 ; CHECK: vpandq (%rdi){1to8}, %zmm
255 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
257 %a = load i64* %ap, align 8
258 %b = insertelement <8 x i64> undef, i64 %a, i32 0
259 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
260 %d = and <8 x i64> %p1, %c