1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512bw -o /dev/null
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null
10 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
11 ; Force the execution domain with an add.
12 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
13 %x = and <4 x i64> %a2, %b
17 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
18 ; Force the execution domain with an add.
19 %a2 = add <2 x i64> %a, <i64 1, i64 1>
20 %x = and <2 x i64> %a2, %b
24 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
25 ; Force the execution domain with an add.
26 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
27 %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
28 %x = and <4 x i64> %a, %y
32 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
33 ; Force the execution domain with an add.
34 %a2 = add <2 x i64> %a, <i64 1, i64 1>
35 %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
36 %x = and <2 x i64> %a, %y
40 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
41 ; Force the execution domain with an add.
42 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
43 %x = or <4 x i64> %a2, %b
47 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
48 ; Force the execution domain with an add.
49 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
50 %x = xor <4 x i64> %a2, %b
54 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
55 ; Force the execution domain with an add.
56 %a2 = add <2 x i64> %a, <i64 1, i64 1>
57 %x = or <2 x i64> %a2, %b
61 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
62 ; Force the execution domain with an add.
63 %a2 = add <2 x i64> %a, <i64 1, i64 1>
64 %x = xor <2 x i64> %a2, %b
68 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
69 %x = add <4 x i64> %i, %j
73 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
74 %x = add <8 x i32> %i, %j
78 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
79 %x = add <16 x i16> %i, %j
83 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
84 %x = add <32 x i8> %i, %j
88 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
89 %x = sub <4 x i64> %i, %j
93 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
94 %x = sub <8 x i32> %i, %j
98 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
99 %x = sub <16 x i16> %i, %j
103 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
104 %x = sub <32 x i8> %i, %j
108 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
109 %x = mul <16 x i16> %i, %j
113 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
114 %bincmp = icmp slt <8 x i32> %i, %j
115 %x = sext <8 x i1> %bincmp to <8 x i32>
119 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
120 %bincmp = icmp eq <32 x i8> %i, %j
121 %x = sext <32 x i1> %bincmp to <32 x i8>
125 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
126 %bincmp = icmp eq <16 x i16> %i, %j
127 %x = sext <16 x i1> %bincmp to <16 x i16>
131 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
132 %bincmp = icmp slt <32 x i8> %i, %j
133 %x = sext <32 x i1> %bincmp to <32 x i8>
137 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
138 %bincmp = icmp slt <16 x i16> %i, %j
139 %x = sext <16 x i1> %bincmp to <16 x i16>
143 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
144 %bincmp = icmp eq <8 x i32> %i, %j
145 %x = sext <8 x i1> %bincmp to <8 x i32>
149 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
150 %x = add <2 x i64> %i, %j
154 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
155 %x = add <4 x i32> %i, %j
159 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
160 %x = add <8 x i16> %i, %j
164 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
165 %x = add <16 x i8> %i, %j
169 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
170 %x = sub <2 x i64> %i, %j
174 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
175 %x = sub <4 x i32> %i, %j
179 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
180 %x = sub <8 x i16> %i, %j
184 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
185 %x = sub <16 x i8> %i, %j
189 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
190 %x = mul <8 x i16> %i, %j
194 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
195 %bincmp = icmp slt <8 x i16> %i, %j
196 %x = sext <8 x i1> %bincmp to <8 x i16>
200 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
201 %bincmp = icmp slt <16 x i8> %i, %j
202 %x = sext <16 x i1> %bincmp to <16 x i8>
206 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
207 %bincmp = icmp eq <8 x i16> %i, %j
208 %x = sext <8 x i1> %bincmp to <8 x i16>
212 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
213 %bincmp = icmp eq <16 x i8> %i, %j
214 %x = sext <16 x i1> %bincmp to <16 x i8>
218 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
219 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
220 ret <8 x i16> %shuffle
223 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
224 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
225 ret <16 x i16> %shuffle
228 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
229 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
230 ret <16 x i8> %shuffle
233 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
234 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
235 ret <32 x i8> %shuffle
238 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
239 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
240 ret <2 x i64> %shuffle
243 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
244 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
245 ret <4 x i32> %shuffle
248 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
249 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
250 ret <8 x i32> %shuffle
253 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
254 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
255 ret <4 x double> %shuffle
258 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
259 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
260 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
261 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
262 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
263 ret <2 x double> %bitcast64
266 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
267 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
268 ret <16 x i16> %shuffle