1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
5 ; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
6 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
7 ret <8 x i8> %shuffle.i
10 define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
11 ; CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
12 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13 ret <16 x i8> %shuffle.i
16 define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
17 ; CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
18 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
19 ret <8 x i8> %shuffle.i
22 define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
23 ; CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
24 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
25 ret <4 x i16> %shuffle.i
28 define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
29 ; CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
30 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
31 ret <16 x i8> %shuffle.i
34 define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
35 ; CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
36 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
37 ret <8 x i16> %shuffle.i
40 define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
41 ; CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
42 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
43 ret <8 x i8> %shuffle.i
46 define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
47 ; CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
48 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
49 ret <4 x i16> %shuffle.i
52 define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
53 ; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
54 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
55 ret <2 x i32> %shuffle.i
58 define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
59 ; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
60 %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 0>
61 ret <2 x float> %shuffle.i
64 define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
65 ; CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
66 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
67 ret <16 x i8> %shuffle.i
70 define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
71 ; CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
72 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
73 ret <8 x i16> %shuffle.i
76 define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
77 ; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
78 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
79 ret <4 x i32> %shuffle.i
82 define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
83 ; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
84 %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
85 ret <4 x float> %shuffle.i
88 define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
89 ; CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
90 %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4
91 ret <4 x i16> %vpaddl.i
94 define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
95 ; CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
96 %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) #4
97 ret <2 x i32> %vpaddl1.i
100 define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
101 ; CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
102 %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) #4
103 ret <1 x i64> %vpaddl1.i
106 define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
107 ; CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
108 %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4
109 ret <4 x i16> %vpaddl.i
112 define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
113 ; CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
114 %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) #4
115 ret <2 x i32> %vpaddl1.i
118 define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
119 ; CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
120 %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) #4
121 ret <1 x i64> %vpaddl1.i
124 define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
125 ; CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
126 %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4
127 ret <8 x i16> %vpaddl.i
130 define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
131 ; CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
132 %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) #4
133 ret <4 x i32> %vpaddl1.i
136 define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
137 ; CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
138 %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) #4
139 ret <2 x i64> %vpaddl1.i
142 define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
143 ; CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
144 %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4
145 ret <8 x i16> %vpaddl.i
148 define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
149 ; CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
150 %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) #4
151 ret <4 x i32> %vpaddl1.i
154 define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
155 ; CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
156 %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) #4
157 ret <2 x i64> %vpaddl1.i
160 define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
161 ; CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
162 %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
163 ret <4 x i16> %vpadal1.i
166 define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
167 ; CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
168 %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
169 ret <2 x i32> %vpadal2.i
172 define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
173 ; CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
174 %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
175 ret <1 x i64> %vpadal2.i
178 define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
179 ; CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
180 %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
181 ret <4 x i16> %vpadal1.i
184 define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
185 ; CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
186 %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
187 ret <2 x i32> %vpadal2.i
190 define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
191 ; CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
192 %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
193 ret <1 x i64> %vpadal2.i
196 define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
197 ; CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
198 %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
199 ret <8 x i16> %vpadal1.i
202 define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
203 ; CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
204 %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
205 ret <4 x i32> %vpadal2.i
208 define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
209 ; CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
210 %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
211 ret <2 x i64> %vpadal2.i
214 define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
215 ; CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
216 %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
217 ret <8 x i16> %vpadal1.i
220 define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
221 ; CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
222 %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
223 ret <4 x i32> %vpadal2.i
226 define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
227 ; CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
228 %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
229 ret <2 x i64> %vpadal2.i
232 define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
233 ; CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
234 %vqabs.i = tail call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4
235 ret <8 x i8> %vqabs.i
238 define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
239 ; CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
240 %vqabs.i = tail call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4
241 ret <16 x i8> %vqabs.i
244 define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
245 ; CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
246 %vqabs1.i = tail call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) #4
247 ret <4 x i16> %vqabs1.i
250 define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
251 ; CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
252 %vqabs1.i = tail call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) #4
253 ret <8 x i16> %vqabs1.i
256 define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
257 ; CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
258 %vqabs1.i = tail call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) #4
259 ret <2 x i32> %vqabs1.i
262 define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
263 ; CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
264 %vqabs1.i = tail call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) #4
265 ret <4 x i32> %vqabs1.i
268 define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 {
269 ; CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
270 %vqabs1.i = tail call <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64> %a) #4
271 ret <2 x i64> %vqabs1.i
274 define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
275 ; CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
276 %vqneg.i = tail call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4
277 ret <8 x i8> %vqneg.i
280 define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
281 ; CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
282 %vqneg.i = tail call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4
283 ret <16 x i8> %vqneg.i
286 define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
287 ; CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
288 %vqneg1.i = tail call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) #4
289 ret <4 x i16> %vqneg1.i
292 define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
293 ; CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
294 %vqneg1.i = tail call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) #4
295 ret <8 x i16> %vqneg1.i
298 define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
299 ; CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
300 %vqneg1.i = tail call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) #4
301 ret <2 x i32> %vqneg1.i
304 define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
305 ; CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
306 %vqneg1.i = tail call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) #4
307 ret <4 x i32> %vqneg1.i
310 define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 {
311 ; CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
312 %vqneg1.i = tail call <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64> %a) #4
313 ret <2 x i64> %vqneg1.i
316 define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
317 ; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
318 %sub.i = sub <8 x i8> zeroinitializer, %a
322 define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
323 ; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
324 %sub.i = sub <16 x i8> zeroinitializer, %a
328 define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
329 ; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
330 %sub.i = sub <4 x i16> zeroinitializer, %a
334 define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
335 ; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
336 %sub.i = sub <8 x i16> zeroinitializer, %a
340 define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
341 ; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
342 %sub.i = sub <2 x i32> zeroinitializer, %a
346 define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
347 ; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
348 %sub.i = sub <4 x i32> zeroinitializer, %a
352 define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 {
353 ; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
354 %sub.i = sub <2 x i64> zeroinitializer, %a
358 define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
359 ; CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
360 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
361 ret <2 x float> %sub.i
364 define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
365 ; CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
366 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
367 ret <4 x float> %sub.i
370 define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 {
371 ; CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
372 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
373 ret <2 x double> %sub.i
376 define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
377 ; CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
378 %vabs.i = tail call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4
382 define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
383 ; CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
384 %vabs.i = tail call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4
385 ret <16 x i8> %vabs.i
388 define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
389 ; CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
390 %vabs1.i = tail call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) #4
391 ret <4 x i16> %vabs1.i
394 define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
395 ; CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
396 %vabs1.i = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) #4
397 ret <8 x i16> %vabs1.i
400 define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
401 ; CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
402 %vabs1.i = tail call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) #4
403 ret <2 x i32> %vabs1.i
406 define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
407 ; CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
408 %vabs1.i = tail call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) #4
409 ret <4 x i32> %vabs1.i
412 define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 {
413 ; CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
414 %vabs1.i = tail call <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64> %a) #4
415 ret <2 x i64> %vabs1.i
418 define <2 x float> @test_vabs_f32(<2 x float> %a) #1 {
419 ; CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
420 %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4
421 ret <2 x float> %vabs1.i
424 define <4 x float> @test_vabsq_f32(<4 x float> %a) #1 {
425 ; CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
426 %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4
427 ret <4 x float> %vabs1.i
430 define <2 x double> @test_vabsq_f64(<2 x double> %a) #1 {
431 ; CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
432 %vabs1.i = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #4
433 ret <2 x double> %vabs1.i
436 define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
437 ; CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
438 %vuqadd.i = tail call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
439 ret <8 x i8> %vuqadd.i
442 define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
443 ; CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
444 %vuqadd.i = tail call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
445 ret <16 x i8> %vuqadd.i
448 define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
449 ; CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
450 %vuqadd2.i = tail call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4
451 ret <4 x i16> %vuqadd2.i
454 define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
455 ; CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
456 %vuqadd2.i = tail call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4
457 ret <8 x i16> %vuqadd2.i
460 define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
461 ; CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
462 %vuqadd2.i = tail call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4
463 ret <2 x i32> %vuqadd2.i
466 define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
467 ; CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
468 %vuqadd2.i = tail call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4
469 ret <4 x i32> %vuqadd2.i
472 define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
473 ; CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
474 %vuqadd2.i = tail call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4
475 ret <2 x i64> %vuqadd2.i
478 define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
479 ; CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
480 %vcls.i = tail call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4
484 define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
485 ; CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
486 %vcls.i = tail call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4
487 ret <16 x i8> %vcls.i
490 define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
491 ; CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
492 %vcls1.i = tail call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) #4
493 ret <4 x i16> %vcls1.i
496 define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
497 ; CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
498 %vcls1.i = tail call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) #4
499 ret <8 x i16> %vcls1.i
502 define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
503 ; CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
504 %vcls1.i = tail call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) #4
505 ret <2 x i32> %vcls1.i
508 define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
509 ; CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
510 %vcls1.i = tail call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) #4
511 ret <4 x i32> %vcls1.i
514 define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
515 ; CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
516 %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
520 define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
521 ; CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
522 %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
523 ret <16 x i8> %vclz.i
526 define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
527 ; CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
528 %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4
529 ret <4 x i16> %vclz1.i
532 define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
533 ; CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
534 %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4
535 ret <8 x i16> %vclz1.i
538 define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
539 ; CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
540 %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4
541 ret <2 x i32> %vclz1.i
544 define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
545 ; CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
546 %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4
547 ret <4 x i32> %vclz1.i
550 define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
551 ; CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
552 %vctpop.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
553 ret <8 x i8> %vctpop.i
556 define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
557 ; CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
558 %vctpop.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
559 ret <16 x i8> %vctpop.i
562 define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
563 ; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
564 %neg.i = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
568 define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
569 ; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
570 %neg.i = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
574 define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
575 ; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
576 %neg.i = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
580 define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
581 ; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
582 %neg.i = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
586 define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
587 ; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
588 %neg.i = xor <2 x i32> %a, <i32 -1, i32 -1>
592 define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
593 ; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
594 %neg.i = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
598 define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 {
599 ; CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
600 %vrbit.i = tail call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #4
601 ret <8 x i8> %vrbit.i
604 define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 {
605 ; CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
606 %vrbit.i = tail call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #4
607 ret <16 x i8> %vrbit.i
610 define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
611 ; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
612 %vmovn.i = trunc <8 x i16> %a to <8 x i8>
613 ret <8 x i8> %vmovn.i
616 define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
617 ; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
618 %vmovn.i = trunc <4 x i32> %a to <4 x i16>
619 ret <4 x i16> %vmovn.i
622 define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
623 ; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
624 %vmovn.i = trunc <2 x i64> %a to <2 x i32>
625 ret <2 x i32> %vmovn.i
628 define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
629 ; CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
630 %vmovn.i.i = trunc <8 x i16> %b to <8 x i8>
631 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vmovn.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
632 ret <16 x i8> %shuffle.i
635 define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
636 ; CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
637 %vmovn.i.i = trunc <4 x i32> %b to <4 x i16>
638 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vmovn.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
639 ret <8 x i16> %shuffle.i
642 define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
643 ; CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
644 %vmovn.i.i = trunc <2 x i64> %b to <2 x i32>
645 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vmovn.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
646 ret <4 x i32> %shuffle.i
649 define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
650 ; CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
651 %vqdmull1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) #4
652 ret <8 x i8> %vqdmull1.i
655 define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
656 ; CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
657 %vqdmull1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) #4
658 ret <4 x i16> %vqdmull1.i
661 define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
662 ; CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
663 %vqdmull1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) #4
664 ret <2 x i32> %vqdmull1.i
667 define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
668 ; CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
669 %vqdmull1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %b) #4
670 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqdmull1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
671 ret <16 x i8> %shuffle.i
674 define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
675 ; CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
676 %vqdmull1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %b) #4
677 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqdmull1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
678 ret <8 x i16> %shuffle.i
681 define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
682 ; CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
683 %vqdmull1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %b) #4
684 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqdmull1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
685 ret <4 x i32> %shuffle.i
688 define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
689 ; CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
690 %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) #4
691 ret <8 x i8> %vqmovn1.i
694 define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
695 ; CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
696 %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) #4
697 ret <4 x i16> %vqmovn1.i
700 define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
701 ; CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
702 %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) #4
703 ret <2 x i32> %vqmovn1.i
706 define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
707 ; CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
708 %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %b) #4
709 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
710 ret <16 x i8> %shuffle.i
713 define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
714 ; CHECK: test_vqmovn_high_s32
715 %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %b) #4
716 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
717 ret <8 x i16> %shuffle.i
720 define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
721 ; CHECK: test_vqmovn_high_s64
722 %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %b) #4
723 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
724 ret <4 x i32> %shuffle.i
727 define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
728 ; CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
729 %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) #4
730 ret <8 x i8> %vqmovn1.i
733 define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
734 ; CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
735 %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) #4
736 ret <4 x i16> %vqmovn1.i
739 define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
740 ; CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
741 %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) #4
742 ret <2 x i32> %vqmovn1.i
745 define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
746 ; CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
747 %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %b) #4
748 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
749 ret <16 x i8> %shuffle.i
752 define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
753 ; CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
754 %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %b) #4
755 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
756 ret <8 x i16> %shuffle.i
759 define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
760 ; CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
761 %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %b) #4
762 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
763 ret <4 x i32> %shuffle.i
766 define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
767 ; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
768 %1 = sext <8 x i8> %a to <8 x i16>
769 %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
770 ret <8 x i16> %vshll_n
773 define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
774 ; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
775 %1 = sext <4 x i16> %a to <4 x i32>
776 %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
777 ret <4 x i32> %vshll_n
780 define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
781 ; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
782 %1 = sext <2 x i32> %a to <2 x i64>
783 %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
784 ret <2 x i64> %vshll_n
787 define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
788 ; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
789 %1 = zext <8 x i8> %a to <8 x i16>
790 %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
791 ret <8 x i16> %vshll_n
794 define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
795 ; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
796 %1 = zext <4 x i16> %a to <4 x i32>
797 %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
798 ret <4 x i32> %vshll_n
801 define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
802 ; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
803 %1 = zext <2 x i32> %a to <2 x i64>
804 %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
805 ret <2 x i64> %vshll_n
808 define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
809 ; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
810 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
811 %1 = sext <8 x i8> %shuffle.i to <8 x i16>
812 %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
813 ret <8 x i16> %vshll_n
816 define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
817 ; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
818 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
819 %1 = sext <4 x i16> %shuffle.i to <4 x i32>
820 %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
821 ret <4 x i32> %vshll_n
824 define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
825 ; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
826 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
827 %1 = sext <2 x i32> %shuffle.i to <2 x i64>
828 %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
829 ret <2 x i64> %vshll_n
832 define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
833 ; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
834 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
835 %1 = zext <8 x i8> %shuffle.i to <8 x i16>
836 %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
837 ret <8 x i16> %vshll_n
840 define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
841 ; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
842 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
843 %1 = zext <4 x i16> %shuffle.i to <4 x i32>
844 %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
845 ret <4 x i32> %vshll_n
848 define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
849 ; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
850 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
851 %1 = zext <2 x i32> %shuffle.i to <2 x i64>
852 %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
853 ret <2 x i64> %vshll_n
856 define <4 x i16> @test_vcvt_f16_f32(<4 x float> %a) #0 {
857 ; CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
858 %vcvt1.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) #4
859 ret <4 x i16> %vcvt1.i
862 define <8 x i16> @test_vcvt_high_f16_f32(<4 x i16> %a, <4 x float> %b) #0 {
863 ; CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
864 %vcvt1.i.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %b) #4
865 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vcvt1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
866 ret <8 x i16> %shuffle.i
869 define <4 x float> @test_vcvt_f32_f16(<4 x i16> %a) #0 {
870 ; CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h
871 %vcvt1.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %a) #4
872 ret <4 x float> %vcvt1.i
875 define <4 x float> @test_vcvt_high_f32_f16(<8 x i16> %a) #0 {
876 ; CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
877 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
878 %vcvt1.i.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %shuffle.i.i) #4
879 ret <4 x float> %vcvt1.i.i
882 define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 {
883 ; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
884 %vcvt.i = fptrunc <2 x double> %a to <2 x float>
885 ret <2 x float> %vcvt.i
888 define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
889 ; CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
890 %vcvt.i.i = fptrunc <2 x double> %b to <2 x float>
891 %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvt.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
892 ret <4 x float> %shuffle.i
895 define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
896 ; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
897 %vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4
898 ret <2 x float> %vcvtx_f32_f641.i
901 define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
902 ; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
903 %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4
904 %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
905 ret <4 x float> %shuffle.i
908 define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 {
909 ; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
910 %vcvt.i = fpext <2 x float> %a to <2 x double>
911 ret <2 x double> %vcvt.i
914 define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 {
915 ; CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
916 %shuffle.i.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
917 %vcvt.i.i = fpext <2 x float> %shuffle.i.i to <2 x double>
918 ret <2 x double> %vcvt.i.i
921 define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 {
922 ; CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
923 %vrndn1.i = tail call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #4
924 ret <2 x float> %vrndn1.i
927 define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 {
928 ; CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
929 %vrndn1.i = tail call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #4
930 ret <4 x float> %vrndn1.i
933 define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 {
934 ; CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
935 %vrndn1.i = tail call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #4
936 ret <2 x double> %vrndn1.i
939 define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 {
940 ; CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
941 %vrnda1.i = tail call <2 x float> @llvm.round.v2f32(<2 x float> %a) #4
942 ret <2 x float> %vrnda1.i
945 define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 {
946 ; CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
947 %vrnda1.i = tail call <4 x float> @llvm.round.v4f32(<4 x float> %a) #4
948 ret <4 x float> %vrnda1.i
951 define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 {
952 ; CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
953 %vrnda1.i = tail call <2 x double> @llvm.round.v2f64(<2 x double> %a) #4
954 ret <2 x double> %vrnda1.i
957 define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 {
958 ; CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
959 %vrndp1.i = tail call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #4
960 ret <2 x float> %vrndp1.i
963 define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 {
964 ; CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
965 %vrndp1.i = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #4
966 ret <4 x float> %vrndp1.i
969 define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 {
970 ; CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
971 %vrndp1.i = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #4
972 ret <2 x double> %vrndp1.i
975 define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 {
976 ; CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
977 %vrndm1.i = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #4
978 ret <2 x float> %vrndm1.i
981 define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 {
982 ; CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
983 %vrndm1.i = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #4
984 ret <4 x float> %vrndm1.i
987 define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 {
988 ; CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
989 %vrndm1.i = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #4
990 ret <2 x double> %vrndm1.i
993 define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 {
994 ; CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
995 %vrndx1.i = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #4
996 ret <2 x float> %vrndx1.i
999 define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 {
1000 ; CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1001 %vrndx1.i = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #4
1002 ret <4 x float> %vrndx1.i
1005 define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 {
1006 ; CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1007 %vrndx1.i = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #4
1008 ret <2 x double> %vrndx1.i
1011 define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 {
1012 ; CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1013 %vrnd1.i = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #4
1014 ret <2 x float> %vrnd1.i
1017 define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 {
1018 ; CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1019 %vrnd1.i = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #4
1020 ret <4 x float> %vrnd1.i
1023 define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 {
1024 ; CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1025 %vrnd1.i = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #4
1026 ret <2 x double> %vrnd1.i
1029 define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 {
1030 ; CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1031 %vrndi1.i = tail call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #4
1032 ret <2 x float> %vrndi1.i
1035 define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 {
1036 ; CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1037 %vrndi1.i = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #4
1038 ret <4 x float> %vrndi1.i
1041 define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 {
1042 ; CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1043 %vrndi1.i = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #4
1044 ret <2 x double> %vrndi1.i
1047 define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
1048 ; CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1049 %vcvt.i = fptosi <2 x float> %a to <2 x i32>
1050 ret <2 x i32> %vcvt.i
1053 define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
1054 ; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1055 %vcvt.i = fptosi <4 x float> %a to <4 x i32>
1056 ret <4 x i32> %vcvt.i
1059 define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 {
1060 ; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1061 %vcvt.i = fptosi <2 x double> %a to <2 x i64>
1062 ret <2 x i64> %vcvt.i
1065 define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
1066 ; CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1067 %vcvt.i = fptoui <2 x float> %a to <2 x i32>
1068 ret <2 x i32> %vcvt.i
1071 define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
1072 ; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1073 %vcvt.i = fptoui <4 x float> %a to <4 x i32>
1074 ret <4 x i32> %vcvt.i
1077 define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 {
1078 ; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1079 %vcvt.i = fptoui <2 x double> %a to <2 x i64>
1080 ret <2 x i64> %vcvt.i
1083 define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) {
1084 ; CHECK-LABEL: test_vcvtn_s32_f32
1085 ; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1086 %vcvtns_f321.i = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a)
1087 ret <2 x i32> %vcvtns_f321.i
1090 define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) {
1091 ; CHECK-LABEL: test_vcvtnq_s32_f32
1092 ; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1093 %vcvtns_f321.i = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a)
1094 ret <4 x i32> %vcvtns_f321.i
1097 define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) {
1098 ; CHECK-LABEL: test_vcvtnq_s64_f64
1099 ; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1100 %vcvtns_f641.i = call <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double> %a)
1101 ret <2 x i64> %vcvtns_f641.i
1104 define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) {
1105 ; CHECK-LABEL: test_vcvtn_u32_f32
1106 ; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1107 %vcvtnu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a)
1108 ret <2 x i32> %vcvtnu_f321.i
1111 define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) {
1112 ; CHECK-LABEL: test_vcvtnq_u32_f32
1113 ; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1114 %vcvtnu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a)
1115 ret <4 x i32> %vcvtnu_f321.i
1118 define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) {
1119 ; CHECK-LABEL: test_vcvtnq_u64_f64
1120 ; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1121 %vcvtnu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double> %a)
1122 ret <2 x i64> %vcvtnu_f641.i
1125 define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) {
1126 ; CHECK-LABEL: test_vcvtp_s32_f32
1127 ; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1128 %vcvtps_f321.i = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a)
1129 ret <2 x i32> %vcvtps_f321.i
1132 define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) {
1133 ; CHECK-LABEL: test_vcvtpq_s32_f32
1134 ; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1135 %vcvtps_f321.i = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a)
1136 ret <4 x i32> %vcvtps_f321.i
1139 define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) {
1140 ; CHECK-LABEL: test_vcvtpq_s64_f64
1141 ; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1142 %vcvtps_f641.i = call <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double> %a)
1143 ret <2 x i64> %vcvtps_f641.i
1146 define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) {
1147 ; CHECK-LABEL: test_vcvtp_u32_f32
1148 ; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1149 %vcvtpu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a)
1150 ret <2 x i32> %vcvtpu_f321.i
1153 define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) {
1154 ; CHECK-LABEL: test_vcvtpq_u32_f32
1155 ; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1156 %vcvtpu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a)
1157 ret <4 x i32> %vcvtpu_f321.i
1160 define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) {
1161 ; CHECK-LABEL: test_vcvtpq_u64_f64
1162 ; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1163 %vcvtpu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double> %a)
1164 ret <2 x i64> %vcvtpu_f641.i
1167 define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) {
1168 ; CHECK-LABEL: test_vcvtm_s32_f32
1169 ; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1170 %vcvtms_f321.i = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a)
1171 ret <2 x i32> %vcvtms_f321.i
1174 define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) {
1175 ; CHECK-LABEL: test_vcvtmq_s32_f32
1176 ; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1177 %vcvtms_f321.i = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a)
1178 ret <4 x i32> %vcvtms_f321.i
1181 define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) {
1182 ; CHECK-LABEL: test_vcvtmq_s64_f64
1183 ; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1184 %vcvtms_f641.i = call <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double> %a)
1185 ret <2 x i64> %vcvtms_f641.i
1188 define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) {
1189 ; CHECK-LABEL: test_vcvtm_u32_f32
1190 ; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1191 %vcvtmu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a)
1192 ret <2 x i32> %vcvtmu_f321.i
1195 define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) {
1196 ; CHECK-LABEL: test_vcvtmq_u32_f32
1197 ; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1198 %vcvtmu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a)
1199 ret <4 x i32> %vcvtmu_f321.i
1202 define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) {
1203 ; CHECK-LABEL: test_vcvtmq_u64_f64
1204 ; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1205 %vcvtmu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double> %a)
1206 ret <2 x i64> %vcvtmu_f641.i
1209 define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) {
1210 ; CHECK-LABEL: test_vcvta_s32_f32
1211 ; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1212 %vcvtas_f321.i = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a)
1213 ret <2 x i32> %vcvtas_f321.i
1216 define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) {
1217 ; CHECK-LABEL: test_vcvtaq_s32_f32
1218 ; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1219 %vcvtas_f321.i = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a)
1220 ret <4 x i32> %vcvtas_f321.i
1223 define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) {
1224 ; CHECK-LABEL: test_vcvtaq_s64_f64
1225 ; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1226 %vcvtas_f641.i = call <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double> %a)
1227 ret <2 x i64> %vcvtas_f641.i
1230 define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) {
1231 ; CHECK-LABEL: test_vcvta_u32_f32
1232 ; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1233 %vcvtau_f321.i = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a)
1234 ret <2 x i32> %vcvtau_f321.i
1237 define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) {
1238 ; CHECK-LABEL: test_vcvtaq_u32_f32
1239 ; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1240 %vcvtau_f321.i = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a)
1241 ret <4 x i32> %vcvtau_f321.i
1244 define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) {
1245 ; CHECK-LABEL: test_vcvtaq_u64_f64
1246 ; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1247 %vcvtau_f641.i = call <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double> %a)
1248 ret <2 x i64> %vcvtau_f641.i
1251 define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 {
1252 ; CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1253 %vrsqrte1.i = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) #4
1254 ret <2 x float> %vrsqrte1.i
1257 define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 {
1258 ; CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1259 %vrsqrte1.i = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) #4
1260 ret <4 x float> %vrsqrte1.i
1263 define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 {
1264 ; CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1265 %vrsqrte1.i = tail call <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double> %a) #4
1266 ret <2 x double> %vrsqrte1.i
1269 define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
1270 ; CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1271 %vrecpe1.i = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) #4
1272 ret <2 x float> %vrecpe1.i
1275 define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
1276 ; CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1277 %vrecpe1.i = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) #4
1278 ret <4 x float> %vrecpe1.i
1281 define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 {
1282 ; CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1283 %vrecpe1.i = tail call <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double> %a) #4
1284 ret <2 x double> %vrecpe1.i
1287 define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
1288 ; CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1289 %vrecpe1.i = tail call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) #4
1290 ret <2 x i32> %vrecpe1.i
1293 define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
1294 ; CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1295 %vrecpe1.i = tail call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) #4
1296 ret <4 x i32> %vrecpe1.i
1299 define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 {
1300 ; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1301 %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4
1302 ret <2 x float> %vsqrt1.i
1305 define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 {
1306 ; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1307 %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4
1308 ret <4 x float> %vsqrt1.i
1311 define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 {
1312 ; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1313 %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4
1314 ret <2 x double> %vsqrt1.i
1317 define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
1318 ; CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1319 %vcvt.i = sitofp <2 x i32> %a to <2 x float>
1320 ret <2 x float> %vcvt.i
1323 define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
1324 ; CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1325 %vcvt.i = uitofp <2 x i32> %a to <2 x float>
1326 ret <2 x float> %vcvt.i
1329 define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
1330 ; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1331 %vcvt.i = sitofp <4 x i32> %a to <4 x float>
1332 ret <4 x float> %vcvt.i
1335 define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
1336 ; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1337 %vcvt.i = uitofp <4 x i32> %a to <4 x float>
1338 ret <4 x float> %vcvt.i
1341 define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 {
1342 ; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1343 %vcvt.i = sitofp <2 x i64> %a to <2 x double>
1344 ret <2 x double> %vcvt.i
1347 define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 {
1348 ; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1349 %vcvt.i = uitofp <2 x i64> %a to <2 x double>
1350 ret <2 x double> %vcvt.i
1353 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2
1355 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2
1357 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2
1359 declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2
1361 declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) #2
1363 declare <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double>) #2
1365 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) #2
1367 declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) #2
1369 declare <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double>) #2
1371 declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2
1373 declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2
1375 declare <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double>)
1377 declare <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float>)
1379 declare <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float>)
1381 declare <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double>)
1383 declare <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float>)
1385 declare <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float>)
1387 declare <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double>)
1389 declare <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float>)
1391 declare <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float>)
1393 declare <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double>)
1395 declare <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float>)
1397 declare <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float>)
1399 declare <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double>)
1401 declare <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float>)
1403 declare <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float>)
1405 declare <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double>)
1407 declare <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float>)
1409 declare <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float>)
1411 declare <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double>)
1413 declare <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float>)
1415 declare <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float>)
1417 declare <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double>)
1419 declare <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float>)
1421 declare <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float>)
1423 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3
1425 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #3
1427 declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #3
1429 declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #3
1431 declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #3
1433 declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #3
1435 declare <2 x double> @llvm.rint.v2f64(<2 x double>) #3
1437 declare <4 x float> @llvm.rint.v4f32(<4 x float>) #3
1439 declare <2 x float> @llvm.rint.v2f32(<2 x float>) #3
1441 declare <2 x double> @llvm.floor.v2f64(<2 x double>) #3
1443 declare <4 x float> @llvm.floor.v4f32(<4 x float>) #3
1445 declare <2 x float> @llvm.floor.v2f32(<2 x float>) #3
1447 declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #3
1449 declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #3
1451 declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #3
1453 declare <2 x double> @llvm.round.v2f64(<2 x double>) #3
1455 declare <4 x float> @llvm.round.v4f32(<4 x float>) #3
1457 declare <2 x float> @llvm.round.v2f32(<2 x float>) #3
1459 declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) #2
1461 declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2
1463 declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2
1465 declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2
1467 declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2
1469 declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) #2
1471 declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) #2
1473 declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) #2
1475 declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) #2
1477 declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) #2
1479 declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) #2
1481 declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) #2
1483 declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) #2
1485 declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) #2
1487 declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) #2
1489 declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) #2
1491 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #2
1493 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) #2
1495 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #2
1497 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #2
1499 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) #2
1501 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) #2
1503 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) #2
1505 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) #2
1507 declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) #2
1509 declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) #2
1511 declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) #2
1513 declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) #2
1515 declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) #2
1517 declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) #2
1519 declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) #2
1521 declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) #2
1523 declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) #2
1525 declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) #2
1527 declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) #2
1529 declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) #2
1531 declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) #2
1533 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #3
1535 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #3
1537 declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #3
1539 declare <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64>) #2
1541 declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) #2
1543 declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) #2
1545 declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) #2
1547 declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) #2
1549 declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) #2
1551 declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) #2
1553 declare <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64>) #2
1555 declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) #2
1557 declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) #2
1559 declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) #2
1561 declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) #2
1563 declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) #2
1565 declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) #2
1567 declare <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64>) #2
1569 declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) #2
1571 declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) #2
1573 declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) #2
1575 declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) #2
1577 declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) #2
1579 declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) #2
1581 declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
1583 declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
1585 declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
1587 declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
1589 declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
1591 declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
1593 declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
1595 declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
1597 declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
1599 declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
1601 declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
1603 declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
1605 declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) #2
1607 declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) #2
1609 declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) #2
1611 declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) #2
1613 declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) #2
1615 declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) #2
1617 declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) #2
1619 declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) #2
1621 declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) #2
1623 declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) #2
1625 declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) #2
1627 declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) #2
1629 declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) #2
1631 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2
1634 define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) {
1635 ; CHECK-LABEL: test_vcvt_s64_f64
1636 ; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}
1637 %1 = fptosi <1 x double> %a to <1 x i64>
1641 define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) {
1642 ; CHECK-LABEL: test_vcvt_u64_f64
1643 ; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}
1644 %1 = fptoui <1 x double> %a to <1 x i64>
1648 define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) {
1649 ; CHECK-LABEL: test_vcvtn_s64_f64
1650 ; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
1651 %1 = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %a)
1655 define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) {
1656 ; CHECK-LABEL: test_vcvtn_u64_f64
1657 ; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
1658 %1 = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %a)
1662 define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) {
1663 ; CHECK-LABEL: test_vcvtp_s64_f64
1664 ; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
1665 %1 = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %a)
1669 define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) {
1670 ; CHECK-LABEL: test_vcvtp_u64_f64
1671 ; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
1672 %1 = call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %a)
1676 define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) {
1677 ; CHECK-LABEL: test_vcvtm_s64_f64
1678 ; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
1679 %1 = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %a)
1683 define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) {
1684 ; CHECK-LABEL: test_vcvtm_u64_f64
1685 ; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
1686 %1 = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %a)
1690 define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) {
1691 ; CHECK-LABEL: test_vcvta_s64_f64
1692 ; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
1693 %1 = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %a)
1697 define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) {
1698 ; CHECK-LABEL: test_vcvta_u64_f64
1699 ; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
1700 %1 = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %a)
1704 define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) {
1705 ; CHECK-LABEL: test_vcvt_f64_s64
1706 ; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}
1707 %1 = sitofp <1 x i64> %a to <1 x double>
1711 define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) {
1712 ; CHECK-LABEL: test_vcvt_f64_u64
1713 ; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}
1714 %1 = uitofp <1 x i64> %a to <1 x double>
1718 declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>)
1719 declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>)
1720 declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>)
1721 declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>)
1722 declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>)
1723 declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>)
1724 declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>)
1725 declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>)
1727 define <1 x double> @test_vrndn_f64(<1 x double> %a) {
1728 ; CHECK-LABEL: test_vrndn_f64
1729 ; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}}
1730 %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
1734 define <1 x double> @test_vrnda_f64(<1 x double> %a) {
1735 ; CHECK-LABEL: test_vrnda_f64
1736 ; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}}
1737 %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a)
1741 define <1 x double> @test_vrndp_f64(<1 x double> %a) {
1742 ; CHECK-LABEL: test_vrndp_f64
1743 ; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}}
1744 %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
1748 define <1 x double> @test_vrndm_f64(<1 x double> %a) {
1749 ; CHECK-LABEL: test_vrndm_f64
1750 ; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}}
1751 %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
1755 define <1 x double> @test_vrndx_f64(<1 x double> %a) {
1756 ; CHECK-LABEL: test_vrndx_f64
1757 ; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}}
1758 %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
1762 define <1 x double> @test_vrnd_f64(<1 x double> %a) {
1763 ; CHECK-LABEL: test_vrnd_f64
1764 ; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}}
1765 %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
1769 define <1 x double> @test_vrndi_f64(<1 x double> %a) {
1770 ; CHECK-LABEL: test_vrndi_f64
1771 ; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}}
1772 %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
1776 declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
1777 declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
1778 declare <1 x double> @llvm.rint.v1f64(<1 x double>)
1779 declare <1 x double> @llvm.floor.v1f64(<1 x double>)
1780 declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
1781 declare <1 x double> @llvm.round.v1f64(<1 x double>)
1782 declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>)
1784 define <1 x double> @test_vrsqrte_f64(<1 x double> %a) {
1785 ; CHECK-LABEL: test_vrsqrte_f64
1786 ; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}}
1787 %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a)
1791 define <1 x double> @test_vrecpe_f64(<1 x double> %a) {
1792 ; CHECK-LABEL: test_vrecpe_f64
1793 ; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}}
1794 %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a)
1798 define <1 x double> @test_vsqrt_f64(<1 x double> %a) {
1799 ; CHECK-LABEL: test_vsqrt_f64
1800 ; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}}
1801 %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
1805 define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) {
1806 ; CHECK-LABEL: test_vrecps_f64
1807 ; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
1808 %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b)
1812 define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) {
1813 ; CHECK-LABEL: test_vrsqrts_f64
1814 ; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
1815 %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b)
1819 declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
1820 declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
1821 declare <1 x double> @llvm.sqrt.v1f64(<1 x double>)
1822 declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
1823 declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
1825 define i64 @test_vaddlv_s32(<2 x i32> %a) {
1826 ; CHECK-LABEL: test_vaddlv_s32
1827 ; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
1828 %1 = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32> %a)
1829 %2 = extractelement <1 x i64> %1, i32 0
1833 define i64 @test_vaddlv_u32(<2 x i32> %a) {
1834 ; CHECK-LABEL: test_vaddlv_u32
1835 ; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
1836 %1 = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32> %a)
1837 %2 = extractelement <1 x i64> %1, i32 0
1841 declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32>)
1842 declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32>)