1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
3 declare float @llvm.aarch64.neon.vminnmv(<4 x float>)
5 declare float @llvm.aarch64.neon.vmaxnmv(<4 x float>)
7 declare float @llvm.aarch64.neon.vminv(<4 x float>)
9 declare float @llvm.aarch64.neon.vmaxv(<4 x float>)
11 declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>)
13 declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16>)
15 declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8>)
17 declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16>)
19 declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8>)
21 declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32>)
23 declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16>)
25 declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8>)
27 declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32>)
29 declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16>)
31 declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8>)
33 declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16>)
35 declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8>)
37 declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16>)
39 declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8>)
41 declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32>)
43 declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16>)
45 declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8>)
47 declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32>)
49 declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16>)
51 declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8>)
53 declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16>)
55 declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8>)
57 declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16>)
59 declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8>)
61 declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32>)
63 declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16>)
65 declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8>)
67 declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32>)
69 declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16>)
71 declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8>)
73 declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16>)
75 declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8>)
77 declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16>)
79 declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8>)
81 define i16 @test_vaddlv_s8(<8 x i8> %a) {
82 ; CHECK: test_vaddlv_s8:
83 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
85 %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8> %a)
86 %0 = extractelement <1 x i16> %saddlv.i, i32 0
90 define i32 @test_vaddlv_s16(<4 x i16> %a) {
91 ; CHECK: test_vaddlv_s16:
92 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
94 %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16> %a)
95 %0 = extractelement <1 x i32> %saddlv.i, i32 0
99 define i16 @test_vaddlv_u8(<8 x i8> %a) {
100 ; CHECK: test_vaddlv_u8:
101 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
103 %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8> %a)
104 %0 = extractelement <1 x i16> %uaddlv.i, i32 0
108 define i32 @test_vaddlv_u16(<4 x i16> %a) {
109 ; CHECK: test_vaddlv_u16:
110 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
112 %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16> %a)
113 %0 = extractelement <1 x i32> %uaddlv.i, i32 0
117 define i16 @test_vaddlvq_s8(<16 x i8> %a) {
118 ; CHECK: test_vaddlvq_s8:
119 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
121 %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8> %a)
122 %0 = extractelement <1 x i16> %saddlv.i, i32 0
126 define i32 @test_vaddlvq_s16(<8 x i16> %a) {
127 ; CHECK: test_vaddlvq_s16:
128 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
130 %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16> %a)
131 %0 = extractelement <1 x i32> %saddlv.i, i32 0
135 define i64 @test_vaddlvq_s32(<4 x i32> %a) {
136 ; CHECK: test_vaddlvq_s32:
137 ; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
139 %saddlv.i = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32> %a)
140 %0 = extractelement <1 x i64> %saddlv.i, i32 0
144 define i16 @test_vaddlvq_u8(<16 x i8> %a) {
145 ; CHECK: test_vaddlvq_u8:
146 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
148 %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8> %a)
149 %0 = extractelement <1 x i16> %uaddlv.i, i32 0
153 define i32 @test_vaddlvq_u16(<8 x i16> %a) {
154 ; CHECK: test_vaddlvq_u16:
155 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
157 %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16> %a)
158 %0 = extractelement <1 x i32> %uaddlv.i, i32 0
162 define i64 @test_vaddlvq_u32(<4 x i32> %a) {
163 ; CHECK: test_vaddlvq_u32:
164 ; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
166 %uaddlv.i = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32> %a)
167 %0 = extractelement <1 x i64> %uaddlv.i, i32 0
171 define i8 @test_vmaxv_s8(<8 x i8> %a) {
172 ; CHECK: test_vmaxv_s8:
173 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
175 %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8> %a)
176 %0 = extractelement <1 x i8> %smaxv.i, i32 0
180 define i16 @test_vmaxv_s16(<4 x i16> %a) {
181 ; CHECK: test_vmaxv_s16:
182 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
184 %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16> %a)
185 %0 = extractelement <1 x i16> %smaxv.i, i32 0
189 define i8 @test_vmaxv_u8(<8 x i8> %a) {
190 ; CHECK: test_vmaxv_u8:
191 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
193 %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8> %a)
194 %0 = extractelement <1 x i8> %umaxv.i, i32 0
198 define i16 @test_vmaxv_u16(<4 x i16> %a) {
199 ; CHECK: test_vmaxv_u16:
200 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
202 %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16> %a)
203 %0 = extractelement <1 x i16> %umaxv.i, i32 0
207 define i8 @test_vmaxvq_s8(<16 x i8> %a) {
208 ; CHECK: test_vmaxvq_s8:
209 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
211 %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8> %a)
212 %0 = extractelement <1 x i8> %smaxv.i, i32 0
216 define i16 @test_vmaxvq_s16(<8 x i16> %a) {
217 ; CHECK: test_vmaxvq_s16:
218 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
220 %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16> %a)
221 %0 = extractelement <1 x i16> %smaxv.i, i32 0
225 define i32 @test_vmaxvq_s32(<4 x i32> %a) {
226 ; CHECK: test_vmaxvq_s32:
227 ; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
229 %smaxv.i = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32> %a)
230 %0 = extractelement <1 x i32> %smaxv.i, i32 0
234 define i8 @test_vmaxvq_u8(<16 x i8> %a) {
235 ; CHECK: test_vmaxvq_u8:
236 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
238 %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8> %a)
239 %0 = extractelement <1 x i8> %umaxv.i, i32 0
243 define i16 @test_vmaxvq_u16(<8 x i16> %a) {
244 ; CHECK: test_vmaxvq_u16:
245 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
247 %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16> %a)
248 %0 = extractelement <1 x i16> %umaxv.i, i32 0
252 define i32 @test_vmaxvq_u32(<4 x i32> %a) {
253 ; CHECK: test_vmaxvq_u32:
254 ; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
256 %umaxv.i = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32> %a)
257 %0 = extractelement <1 x i32> %umaxv.i, i32 0
261 define i8 @test_vminv_s8(<8 x i8> %a) {
262 ; CHECK: test_vminv_s8:
263 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
265 %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8> %a)
266 %0 = extractelement <1 x i8> %sminv.i, i32 0
270 define i16 @test_vminv_s16(<4 x i16> %a) {
271 ; CHECK: test_vminv_s16:
272 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
274 %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16> %a)
275 %0 = extractelement <1 x i16> %sminv.i, i32 0
279 define i8 @test_vminv_u8(<8 x i8> %a) {
280 ; CHECK: test_vminv_u8:
281 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
283 %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8> %a)
284 %0 = extractelement <1 x i8> %uminv.i, i32 0
288 define i16 @test_vminv_u16(<4 x i16> %a) {
289 ; CHECK: test_vminv_u16:
290 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
292 %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16> %a)
293 %0 = extractelement <1 x i16> %uminv.i, i32 0
297 define i8 @test_vminvq_s8(<16 x i8> %a) {
298 ; CHECK: test_vminvq_s8:
299 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
301 %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8> %a)
302 %0 = extractelement <1 x i8> %sminv.i, i32 0
306 define i16 @test_vminvq_s16(<8 x i16> %a) {
307 ; CHECK: test_vminvq_s16:
308 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
310 %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16> %a)
311 %0 = extractelement <1 x i16> %sminv.i, i32 0
315 define i32 @test_vminvq_s32(<4 x i32> %a) {
316 ; CHECK: test_vminvq_s32:
317 ; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
319 %sminv.i = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32> %a)
320 %0 = extractelement <1 x i32> %sminv.i, i32 0
324 define i8 @test_vminvq_u8(<16 x i8> %a) {
325 ; CHECK: test_vminvq_u8:
326 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
328 %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8> %a)
329 %0 = extractelement <1 x i8> %uminv.i, i32 0
333 define i16 @test_vminvq_u16(<8 x i16> %a) {
334 ; CHECK: test_vminvq_u16:
335 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
337 %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16> %a)
338 %0 = extractelement <1 x i16> %uminv.i, i32 0
342 define i32 @test_vminvq_u32(<4 x i32> %a) {
343 ; CHECK: test_vminvq_u32:
344 ; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
346 %uminv.i = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32> %a)
347 %0 = extractelement <1 x i32> %uminv.i, i32 0
351 define i8 @test_vaddv_s8(<8 x i8> %a) {
352 ; CHECK: test_vaddv_s8:
353 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
355 %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
356 %0 = extractelement <1 x i8> %vaddv.i, i32 0
360 define i16 @test_vaddv_s16(<4 x i16> %a) {
361 ; CHECK: test_vaddv_s16:
362 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
364 %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
365 %0 = extractelement <1 x i16> %vaddv.i, i32 0
369 define i8 @test_vaddv_u8(<8 x i8> %a) {
370 ; CHECK: test_vaddv_u8:
371 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
373 %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
374 %0 = extractelement <1 x i8> %vaddv.i, i32 0
378 define i16 @test_vaddv_u16(<4 x i16> %a) {
379 ; CHECK: test_vaddv_u16:
380 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
382 %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
383 %0 = extractelement <1 x i16> %vaddv.i, i32 0
387 define i8 @test_vaddvq_s8(<16 x i8> %a) {
388 ; CHECK: test_vaddvq_s8:
389 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
391 %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
392 %0 = extractelement <1 x i8> %vaddv.i, i32 0
396 define i16 @test_vaddvq_s16(<8 x i16> %a) {
397 ; CHECK: test_vaddvq_s16:
398 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
400 %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
401 %0 = extractelement <1 x i16> %vaddv.i, i32 0
405 define i32 @test_vaddvq_s32(<4 x i32> %a) {
406 ; CHECK: test_vaddvq_s32:
407 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
409 %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
410 %0 = extractelement <1 x i32> %vaddv.i, i32 0
414 define i8 @test_vaddvq_u8(<16 x i8> %a) {
415 ; CHECK: test_vaddvq_u8:
416 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
418 %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
419 %0 = extractelement <1 x i8> %vaddv.i, i32 0
423 define i16 @test_vaddvq_u16(<8 x i16> %a) {
424 ; CHECK: test_vaddvq_u16:
425 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
427 %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
428 %0 = extractelement <1 x i16> %vaddv.i, i32 0
432 define i32 @test_vaddvq_u32(<4 x i32> %a) {
433 ; CHECK: test_vaddvq_u32:
434 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
436 %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
437 %0 = extractelement <1 x i32> %vaddv.i, i32 0
441 define float @test_vmaxvq_f32(<4 x float> %a) {
442 ; CHECK: test_vmaxvq_f32:
443 ; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
445 %0 = call float @llvm.aarch64.neon.vmaxv(<4 x float> %a)
449 define float @test_vminvq_f32(<4 x float> %a) {
450 ; CHECK: test_vminvq_f32:
451 ; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
453 %0 = call float @llvm.aarch64.neon.vminv(<4 x float> %a)
457 define float @test_vmaxnmvq_f32(<4 x float> %a) {
458 ; CHECK: test_vmaxnmvq_f32:
459 ; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
461 %0 = call float @llvm.aarch64.neon.vmaxnmv(<4 x float> %a)
465 define float @test_vminnmvq_f32(<4 x float> %a) {
466 ; CHECK: test_vminnmvq_f32:
467 ; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
469 %0 = call float @llvm.aarch64.neon.vminnmv(<4 x float> %a)