1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 ; CHECK-LABEL: addpd512
6 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
8 %add.i = fadd <8 x double> %x, %y
9 ret <8 x double> %add.i
12 ; CHECK-LABEL: addpd512fold
13 ; CHECK: vaddpd LCP{{.*}}(%rip)
15 define <8 x double> @addpd512fold(<8 x double> %y) {
17 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
18 ret <8 x double> %add.i
21 ; CHECK-LABEL: addps512
24 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
26 %add.i = fadd <16 x float> %x, %y
27 ret <16 x float> %add.i
30 ; CHECK-LABEL: addps512fold
31 ; CHECK: vaddps LCP{{.*}}(%rip)
33 define <16 x float> @addps512fold(<16 x float> %y) {
35 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
36 ret <16 x float> %add.i
39 ; CHECK-LABEL: subpd512
42 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
44 %sub.i = fsub <8 x double> %x, %y
45 ret <8 x double> %sub.i
48 ; CHECK-LABEL: @subpd512fold
51 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
53 %tmp2 = load <8 x double>* %x, align 8
54 %sub.i = fsub <8 x double> %y, %tmp2
55 ret <8 x double> %sub.i
58 ; CHECK-LABEL: @subps512
61 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
63 %sub.i = fsub <16 x float> %x, %y
64 ret <16 x float> %sub.i
67 ; CHECK-LABEL: subps512fold
70 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
72 %tmp2 = load <16 x float>* %x, align 4
73 %sub.i = fsub <16 x float> %y, %tmp2
74 ret <16 x float> %sub.i
77 ; CHECK-LABEL: imulq512
81 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
82 %z = mul <8 x i64>%x, %y
86 ; CHECK-LABEL: mulpd512
89 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
91 %mul.i = fmul <8 x double> %x, %y
92 ret <8 x double> %mul.i
95 ; CHECK-LABEL: mulpd512fold
96 ; CHECK: vmulpd LCP{{.*}}(%rip)
98 define <8 x double> @mulpd512fold(<8 x double> %y) {
100 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
101 ret <8 x double> %mul.i
104 ; CHECK-LABEL: mulps512
107 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
109 %mul.i = fmul <16 x float> %x, %y
110 ret <16 x float> %mul.i
113 ; CHECK-LABEL: mulps512fold
114 ; CHECK: vmulps LCP{{.*}}(%rip)
116 define <16 x float> @mulps512fold(<16 x float> %y) {
118 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
119 ret <16 x float> %mul.i
122 ; CHECK-LABEL: divpd512
125 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
127 %div.i = fdiv <8 x double> %x, %y
128 ret <8 x double> %div.i
131 ; CHECK-LABEL: divpd512fold
132 ; CHECK: vdivpd LCP{{.*}}(%rip)
134 define <8 x double> @divpd512fold(<8 x double> %y) {
136 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
137 ret <8 x double> %div.i
140 ; CHECK-LABEL: divps512
143 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
145 %div.i = fdiv <16 x float> %x, %y
146 ret <16 x float> %div.i
149 ; CHECK-LABEL: divps512fold
150 ; CHECK: vdivps LCP{{.*}}(%rip)
152 define <16 x float> @divps512fold(<16 x float> %y) {
154 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
155 ret <16 x float> %div.i
158 ; CHECK-LABEL: vpaddq_test
161 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
162 %x = add <8 x i64> %i, %j
166 ; CHECK-LABEL: vpaddd_test
169 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
170 %x = add <16 x i32> %i, %j
174 ; CHECK-LABEL: vpsubq_test
177 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
178 %x = sub <8 x i64> %i, %j
182 ; CHECK-LABEL: vpsubd_test
185 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
186 %x = sub <16 x i32> %i, %j
190 ; CHECK-LABEL: vpmulld_test
191 ; CHECK: vpmulld %zmm
193 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
194 %x = mul <16 x i32> %i, %j
199 ; CHECK: vsqrtss {{.*}} encoding: [0x62
201 declare float @sqrtf(float) readnone
202 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
204 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
209 ; CHECK: vsqrtsd {{.*}}## encoding: [0x62
211 declare double @sqrt(double) readnone
212 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
214 %call = tail call double @sqrt(double %a) nounwind readnone
219 ; CHECK: vsqrtss {{.*}}## encoding: [0x62
221 declare float @llvm.sqrt.f32(float)
222 define float @sqrtC(float %a) nounwind {
223 %b = call float @llvm.sqrt.f32(float %a)
227 ; CHECK-LABEL: fadd_broadcast
228 ; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0
230 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
231 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
235 ; CHECK-LABEL: addq_broadcast
236 ; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
238 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
239 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
243 ; CHECK-LABEL: orq_broadcast
244 ; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
246 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
247 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
251 ; CHECK-LABEL: andd512fold
254 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
256 %a = load <16 x i32>* %x, align 4
257 %b = and <16 x i32> %y, %a
261 ; CHECK-LABEL: andqbrst
262 ; CHECK: vpandq (%rdi){1to8}, %zmm
264 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
266 %a = load i64* %ap, align 8
267 %b = insertelement <8 x i64> undef, i64 %a, i32 0
268 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
269 %d = and <8 x i64> %p1, %c