1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
3 ; CHECK-LABEL: addpd512
6 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
8 %add.i = fadd <8 x double> %x, %y
9 ret <8 x double> %add.i
12 ; CHECK-LABEL: addpd512fold
13 ; CHECK: vaddpd LCP{{.*}}(%rip)
15 define <8 x double> @addpd512fold(<8 x double> %y) {
17 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
18 ret <8 x double> %add.i
21 ; CHECK-LABEL: addps512
24 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
26 %add.i = fadd <16 x float> %x, %y
27 ret <16 x float> %add.i
30 ; CHECK-LABEL: addps512fold
31 ; CHECK: vaddps LCP{{.*}}(%rip)
33 define <16 x float> @addps512fold(<16 x float> %y) {
35 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
36 ret <16 x float> %add.i
39 ; CHECK-LABEL: subpd512
42 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
44 %sub.i = fsub <8 x double> %x, %y
45 ret <8 x double> %sub.i
48 ; CHECK-LABEL: @subpd512fold
51 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
53 %tmp2 = load <8 x double>* %x, align 8
54 %sub.i = fsub <8 x double> %y, %tmp2
55 ret <8 x double> %sub.i
58 ; CHECK-LABEL: @subps512
61 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
63 %sub.i = fsub <16 x float> %x, %y
64 ret <16 x float> %sub.i
67 ; CHECK-LABEL: subps512fold
70 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
72 %tmp2 = load <16 x float>* %x, align 4
73 %sub.i = fsub <16 x float> %y, %tmp2
74 ret <16 x float> %sub.i
77 ; CHECK-LABEL: mulpd512
80 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
82 %mul.i = fmul <8 x double> %x, %y
83 ret <8 x double> %mul.i
86 ; CHECK-LABEL: mulpd512fold
87 ; CHECK: vmulpd LCP{{.*}}(%rip)
89 define <8 x double> @mulpd512fold(<8 x double> %y) {
91 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
92 ret <8 x double> %mul.i
95 ; CHECK-LABEL: mulps512
98 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
100 %mul.i = fmul <16 x float> %x, %y
101 ret <16 x float> %mul.i
104 ; CHECK-LABEL: mulps512fold
105 ; CHECK: vmulps LCP{{.*}}(%rip)
107 define <16 x float> @mulps512fold(<16 x float> %y) {
109 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
110 ret <16 x float> %mul.i
113 ; CHECK-LABEL: divpd512
116 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
118 %div.i = fdiv <8 x double> %x, %y
119 ret <8 x double> %div.i
122 ; CHECK-LABEL: divpd512fold
123 ; CHECK: vdivpd LCP{{.*}}(%rip)
125 define <8 x double> @divpd512fold(<8 x double> %y) {
127 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
128 ret <8 x double> %div.i
131 ; CHECK-LABEL: divps512
134 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
136 %div.i = fdiv <16 x float> %x, %y
137 ret <16 x float> %div.i
140 ; CHECK-LABEL: divps512fold
141 ; CHECK: vdivps LCP{{.*}}(%rip)
143 define <16 x float> @divps512fold(<16 x float> %y) {
145 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
146 ret <16 x float> %div.i
149 ; CHECK-LABEL: vpaddq_test
152 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
153 %x = add <8 x i64> %i, %j
157 ; CHECK-LABEL: vpaddd_test
160 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
161 %x = add <16 x i32> %i, %j
165 ; CHECK-LABEL: vpsubq_test
168 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
169 %x = sub <8 x i64> %i, %j
173 ; CHECK-LABEL: vpsubd_test
176 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
177 %x = sub <16 x i32> %i, %j
181 ; CHECK-LABEL: vpmulld_test
182 ; CHECK: vpmulld %zmm
184 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
185 %x = mul <16 x i32> %i, %j
189 ; CHECK-LABEL: addq_broadcast
190 ; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
192 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
193 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
197 ; CHECK-LABEL: orq_broadcast
198 ; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
200 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
201 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
205 ; CHECK-LABEL: andd512fold
208 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
210 %a = load <16 x i32>* %x, align 4
211 %b = and <16 x i32> %y, %a
215 ; CHECK-LABEL: andqbrst
216 ; CHECK: vpandq (%rdi){1to8}, %zmm
218 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
220 %a = load i64* %ap, align 8
221 %b = insertelement <8 x i64> undef, i64 %a, i32 0
222 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
223 %d = and <8 x i64> %p1, %c