1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null
9 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
10 ; Force the execution domain with an add.
11 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
12 %x = and <4 x i64> %a2, %b
16 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
17 ; Force the execution domain with an add.
18 %a2 = add <2 x i64> %a, <i64 1, i64 1>
19 %x = and <2 x i64> %a2, %b
23 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
24 ; Force the execution domain with an add.
25 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
26 %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
27 %x = and <4 x i64> %a, %y
31 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
32 ; Force the execution domain with an add.
33 %a2 = add <2 x i64> %a, <i64 1, i64 1>
34 %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
35 %x = and <2 x i64> %a, %y
39 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
40 ; Force the execution domain with an add.
41 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
42 %x = or <4 x i64> %a2, %b
46 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
47 ; Force the execution domain with an add.
48 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
49 %x = xor <4 x i64> %a2, %b
53 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
54 ; Force the execution domain with an add.
55 %a2 = add <2 x i64> %a, <i64 1, i64 1>
56 %x = or <2 x i64> %a2, %b
60 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
61 ; Force the execution domain with an add.
62 %a2 = add <2 x i64> %a, <i64 1, i64 1>
63 %x = xor <2 x i64> %a2, %b
67 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
68 %x = add <4 x i64> %i, %j
72 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
73 %x = add <8 x i32> %i, %j
77 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
78 %x = add <16 x i16> %i, %j
82 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
83 %x = add <32 x i8> %i, %j
87 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
88 %x = sub <4 x i64> %i, %j
92 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
93 %x = sub <8 x i32> %i, %j
97 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
98 %x = sub <16 x i16> %i, %j
102 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
103 %x = sub <32 x i8> %i, %j
107 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
108 %x = mul <16 x i16> %i, %j
112 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
113 %bincmp = icmp slt <8 x i32> %i, %j
114 %x = sext <8 x i1> %bincmp to <8 x i32>
118 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
119 %bincmp = icmp eq <32 x i8> %i, %j
120 %x = sext <32 x i1> %bincmp to <32 x i8>
124 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
125 %bincmp = icmp eq <16 x i16> %i, %j
126 %x = sext <16 x i1> %bincmp to <16 x i16>
130 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
131 %bincmp = icmp slt <32 x i8> %i, %j
132 %x = sext <32 x i1> %bincmp to <32 x i8>
136 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
137 %bincmp = icmp slt <16 x i16> %i, %j
138 %x = sext <16 x i1> %bincmp to <16 x i16>
142 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
143 %bincmp = icmp eq <8 x i32> %i, %j
144 %x = sext <8 x i1> %bincmp to <8 x i32>
148 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
149 %x = add <2 x i64> %i, %j
153 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
154 %x = add <4 x i32> %i, %j
158 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
159 %x = add <8 x i16> %i, %j
163 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
164 %x = add <16 x i8> %i, %j
168 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
169 %x = sub <2 x i64> %i, %j
173 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
174 %x = sub <4 x i32> %i, %j
178 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
179 %x = sub <8 x i16> %i, %j
183 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
184 %x = sub <16 x i8> %i, %j
188 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
189 %x = mul <8 x i16> %i, %j
193 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
194 %bincmp = icmp slt <8 x i16> %i, %j
195 %x = sext <8 x i1> %bincmp to <8 x i16>
199 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
200 %bincmp = icmp slt <16 x i8> %i, %j
201 %x = sext <16 x i1> %bincmp to <16 x i8>
205 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
206 %bincmp = icmp eq <8 x i16> %i, %j
207 %x = sext <8 x i1> %bincmp to <8 x i16>
211 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
212 %bincmp = icmp eq <16 x i8> %i, %j
213 %x = sext <16 x i1> %bincmp to <16 x i8>