1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
3 define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
4 ; CHECK: test_vshr_n_s8
5 ; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
6 %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
10 define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
11 ; CHECK: test_vshr_n_s16
12 ; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
13 %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
17 define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
18 ; CHECK: test_vshr_n_s32
19 ; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
20 %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
24 define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
25 ; CHECK: test_vshrq_n_s8
26 ; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
27 %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
31 define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
32 ; CHECK: test_vshrq_n_s16
33 ; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
34 %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
38 define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
39 ; CHECK: test_vshrq_n_s32
40 ; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
41 %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
45 define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
46 ; CHECK: test_vshrq_n_s64
47 ; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
48 %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
52 define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
53 ; CHECK: test_vshr_n_u8
54 ; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
55 %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
59 define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
60 ; CHECK: test_vshr_n_u16
61 ; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
62 %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
66 define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
67 ; CHECK: test_vshr_n_u32
68 ; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
69 %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
73 define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
74 ; CHECK: test_vshrq_n_u8
75 ; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
76 %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
80 define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
81 ; CHECK: test_vshrq_n_u16
82 ; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
83 %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
87 define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
88 ; CHECK: test_vshrq_n_u32
89 ; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
90 %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
94 define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
95 ; CHECK: test_vshrq_n_u64
96 ; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
97 %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
101 define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
102 ; CHECK: test_vsra_n_s8
103 ; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
104 %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
105 %1 = add <8 x i8> %vsra_n, %a
109 define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
110 ; CHECK: test_vsra_n_s16
111 ; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
112 %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
113 %1 = add <4 x i16> %vsra_n, %a
117 define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
118 ; CHECK: test_vsra_n_s32
119 ; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
120 %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
121 %1 = add <2 x i32> %vsra_n, %a
125 define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
126 ; CHECK: test_vsraq_n_s8
127 ; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
128 %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
129 %1 = add <16 x i8> %vsra_n, %a
133 define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
134 ; CHECK: test_vsraq_n_s16
135 ; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
136 %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
137 %1 = add <8 x i16> %vsra_n, %a
141 define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
142 ; CHECK: test_vsraq_n_s32
143 ; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
144 %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
145 %1 = add <4 x i32> %vsra_n, %a
149 define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
150 ; CHECK: test_vsraq_n_s64
151 ; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
152 %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
153 %1 = add <2 x i64> %vsra_n, %a
157 define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
158 ; CHECK: test_vsra_n_u8
159 ; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
160 %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
161 %1 = add <8 x i8> %vsra_n, %a
165 define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
166 ; CHECK: test_vsra_n_u16
167 ; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
168 %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
169 %1 = add <4 x i16> %vsra_n, %a
173 define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
174 ; CHECK: test_vsra_n_u32
175 ; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
176 %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
177 %1 = add <2 x i32> %vsra_n, %a
181 define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
182 ; CHECK: test_vsraq_n_u8
183 ; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
184 %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
185 %1 = add <16 x i8> %vsra_n, %a
189 define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
190 ; CHECK: test_vsraq_n_u16
191 ; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
192 %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
193 %1 = add <8 x i16> %vsra_n, %a
197 define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
198 ; CHECK: test_vsraq_n_u32
199 ; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
200 %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
201 %1 = add <4 x i32> %vsra_n, %a
205 define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
206 ; CHECK: test_vsraq_n_u64
207 ; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
208 %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
209 %1 = add <2 x i64> %vsra_n, %a
213 define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) {
214 ; CHECK: test_vrshr_n_s8
215 ; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
216 %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3)
217 ret <8 x i8> %vrshr_n
221 define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) {
222 ; CHECK: test_vrshr_n_s16
223 ; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
224 %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3)
225 ret <4 x i16> %vrshr_n
229 define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) {
230 ; CHECK: test_vrshr_n_s32
231 ; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
232 %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3)
233 ret <2 x i32> %vrshr_n
237 define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
238 ; CHECK: test_vrshrq_n_s8
239 ; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
240 %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3)
241 ret <16 x i8> %vrshr_n
245 define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
246 ; CHECK: test_vrshrq_n_s16
247 ; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
248 %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3)
249 ret <8 x i16> %vrshr_n
253 define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
254 ; CHECK: test_vrshrq_n_s32
255 ; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
256 %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3)
257 ret <4 x i32> %vrshr_n
261 define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) {
262 ; CHECK: test_vrshrq_n_s64
263 ; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
264 %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3)
265 ret <2 x i64> %vrshr_n
269 define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) {
270 ; CHECK: test_vrshr_n_u8
271 ; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
272 %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3)
273 ret <8 x i8> %vrshr_n
277 define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) {
278 ; CHECK: test_vrshr_n_u16
279 ; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
280 %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3)
281 ret <4 x i16> %vrshr_n
285 define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) {
286 ; CHECK: test_vrshr_n_u32
287 ; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
288 %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3)
289 ret <2 x i32> %vrshr_n
293 define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
294 ; CHECK: test_vrshrq_n_u8
295 ; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
296 %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3)
297 ret <16 x i8> %vrshr_n
301 define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
302 ; CHECK: test_vrshrq_n_u16
303 ; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
304 %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3)
305 ret <8 x i16> %vrshr_n
309 define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
310 ; CHECK: test_vrshrq_n_u32
311 ; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
312 %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3)
313 ret <4 x i32> %vrshr_n
317 define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) {
318 ; CHECK: test_vrshrq_n_u64
319 ; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
320 %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3)
321 ret <2 x i64> %vrshr_n
325 define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
326 ; CHECK: test_vrsra_n_s8
327 ; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
328 %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3)
329 %vrsra_n = add <8 x i8> %1, %a
330 ret <8 x i8> %vrsra_n
333 define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
334 ; CHECK: test_vrsra_n_s16
335 ; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
336 %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3)
337 %vrsra_n = add <4 x i16> %1, %a
338 ret <4 x i16> %vrsra_n
341 define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
342 ; CHECK: test_vrsra_n_s32
343 ; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
344 %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3)
345 %vrsra_n = add <2 x i32> %1, %a
346 ret <2 x i32> %vrsra_n
349 define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
350 ; CHECK: test_vrsraq_n_s8
351 ; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
352 %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3)
353 %vrsra_n = add <16 x i8> %1, %a
354 ret <16 x i8> %vrsra_n
357 define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
358 ; CHECK: test_vrsraq_n_s16
359 ; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
360 %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3)
361 %vrsra_n = add <8 x i16> %1, %a
362 ret <8 x i16> %vrsra_n
365 define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
366 ; CHECK: test_vrsraq_n_s32
367 ; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
368 %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3)
369 %vrsra_n = add <4 x i32> %1, %a
370 ret <4 x i32> %vrsra_n
373 define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
374 ; CHECK: test_vrsraq_n_s64
375 ; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
376 %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3)
377 %vrsra_n = add <2 x i64> %1, %a
378 ret <2 x i64> %vrsra_n
381 define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
382 ; CHECK: test_vrsra_n_u8
383 ; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
384 %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3)
385 %vrsra_n = add <8 x i8> %1, %a
386 ret <8 x i8> %vrsra_n
389 define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
390 ; CHECK: test_vrsra_n_u16
391 ; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
392 %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3)
393 %vrsra_n = add <4 x i16> %1, %a
394 ret <4 x i16> %vrsra_n
397 define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
398 ; CHECK: test_vrsra_n_u32
399 ; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
400 %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3)
401 %vrsra_n = add <2 x i32> %1, %a
402 ret <2 x i32> %vrsra_n
405 define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
406 ; CHECK: test_vrsraq_n_u8
407 ; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
408 %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3)
409 %vrsra_n = add <16 x i8> %1, %a
410 ret <16 x i8> %vrsra_n
413 define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
414 ; CHECK: test_vrsraq_n_u16
415 ; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
416 %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3)
417 %vrsra_n = add <8 x i16> %1, %a
418 ret <8 x i16> %vrsra_n
421 define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
422 ; CHECK: test_vrsraq_n_u32
423 ; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
424 %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3)
425 %vrsra_n = add <4 x i32> %1, %a
426 ret <4 x i32> %vrsra_n
429 define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
430 ; CHECK: test_vrsraq_n_u64
431 ; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
432 %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3)
433 %vrsra_n = add <2 x i64> %1, %a
434 ret <2 x i64> %vrsra_n
437 define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) {
438 ; CHECK: test_vsri_n_s8
439 ; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
440 %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
445 define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) {
446 ; CHECK: test_vsri_n_s16
447 ; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
448 %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
453 define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) {
454 ; CHECK: test_vsri_n_s32
455 ; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
456 %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
461 define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) {
462 ; CHECK: test_vsriq_n_s8
463 ; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
464 %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
465 ret <16 x i8> %vsri_n
469 define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) {
470 ; CHECK: test_vsriq_n_s16
471 ; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
472 %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
477 define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) {
478 ; CHECK: test_vsriq_n_s32
479 ; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
480 %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
485 define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) {
486 ; CHECK: test_vsriq_n_s64
487 ; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
488 %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
492 define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) {
493 ; CHECK: test_vsri_n_p8
494 ; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
495 %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
499 define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) {
500 ; CHECK: test_vsri_n_p16
501 ; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
502 %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
506 define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) {
507 ; CHECK: test_vsriq_n_p8
508 ; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
509 %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
510 ret <16 x i8> %vsri_n
513 define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) {
514 ; CHECK: test_vsriq_n_p16
515 ; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
516 %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
520 define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) {
521 ; CHECK: test_vsli_n_s8
522 ; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
523 %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
527 define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) {
528 ; CHECK: test_vsli_n_s16
529 ; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
530 %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
534 define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) {
535 ; CHECK: test_vsli_n_s32
536 ; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
537 %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
541 define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) {
542 ; CHECK: test_vsliq_n_s8
543 ; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
544 %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
545 ret <16 x i8> %vsli_n
548 define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) {
549 ; CHECK: test_vsliq_n_s16
550 ; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
551 %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
555 define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) {
556 ; CHECK: test_vsliq_n_s32
557 ; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
558 %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
562 define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) {
563 ; CHECK: test_vsliq_n_s64
564 ; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
565 %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
569 define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) {
570 ; CHECK: test_vsli_n_p8
571 ; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
572 %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
576 define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) {
577 ; CHECK: test_vsli_n_p16
578 ; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
579 %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
583 define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) {
584 ; CHECK: test_vsliq_n_p8
585 ; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
586 %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
587 ret <16 x i8> %vsli_n
590 define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) {
591 ; CHECK: test_vsliq_n_p16
592 ; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
593 %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
597 define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) {
598 ; CHECK: test_vqshl_n_s8
599 ; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
600 %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
605 define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) {
606 ; CHECK: test_vqshl_n_s16
607 ; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
608 %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
613 define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) {
614 ; CHECK: test_vqshl_n_s32
615 ; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
616 %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
621 define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
622 ; CHECK: test_vqshlq_n_s8
623 ; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
624 %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
625 ret <16 x i8> %vqshl_n
629 define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
630 ; CHECK: test_vqshlq_n_s16
631 ; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
632 %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
637 define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
638 ; CHECK: test_vqshlq_n_s32
639 ; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
640 %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
645 define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) {
646 ; CHECK: test_vqshlq_n_s64
647 ; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
648 %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
653 define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) {
654 ; CHECK: test_vqshl_n_u8
655 ; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
656 %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
657 ret <8 x i8> %vqshl_n
661 define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) {
662 ; CHECK: test_vqshl_n_u16
663 ; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
664 %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
669 define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) {
670 ; CHECK: test_vqshl_n_u32
671 ; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
672 %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
677 define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
678 ; CHECK: test_vqshlq_n_u8
679 ; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
680 %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
681 ret <16 x i8> %vqshl_n
685 define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
686 ; CHECK: test_vqshlq_n_u16
687 ; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
688 %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
693 define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
694 ; CHECK: test_vqshlq_n_u32
695 ; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
696 %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
701 define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) {
702 ; CHECK: test_vqshlq_n_u64
703 ; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
704 %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
708 define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) {
709 ; CHECK: test_vqshlu_n_s8
710 ; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
711 %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3)
716 define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) {
717 ; CHECK: test_vqshlu_n_s16
718 ; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
719 %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3)
720 ret <4 x i16> %vqshlu
724 define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) {
725 ; CHECK: test_vqshlu_n_s32
726 ; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
727 %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3)
728 ret <2 x i32> %vqshlu
732 define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
733 ; CHECK: test_vqshluq_n_s8
734 ; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
735 %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3)
736 ret <16 x i8> %vqshlu
740 define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
741 ; CHECK: test_vqshluq_n_s16
742 ; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
743 %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3)
744 ret <8 x i16> %vqshlu
748 define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
749 ; CHECK: test_vqshluq_n_s32
750 ; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
751 %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3)
752 ret <4 x i32> %vqshlu
756 define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) {
757 ; CHECK: test_vqshluq_n_s64
758 ; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
759 %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3)
760 ret <2 x i64> %vqshlu
764 define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
765 ; CHECK: test_vshrn_n_s16
766 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
767 %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
768 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
769 ret <8 x i8> %vshrn_n
772 define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
773 ; CHECK: test_vshrn_n_s32
774 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
775 %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
776 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
777 ret <4 x i16> %vshrn_n
780 define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
781 ; CHECK: test_vshrn_n_s64
782 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
783 %1 = ashr <2 x i64> %a, <i64 19, i64 19>
784 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
785 ret <2 x i32> %vshrn_n
788 define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
789 ; CHECK: test_vshrn_n_u16
790 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
791 %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
792 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
793 ret <8 x i8> %vshrn_n
796 define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
797 ; CHECK: test_vshrn_n_u32
798 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
799 %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
800 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
801 ret <4 x i16> %vshrn_n
804 define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
805 ; CHECK: test_vshrn_n_u64
806 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
807 %1 = lshr <2 x i64> %a, <i64 19, i64 19>
808 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
809 ret <2 x i32> %vshrn_n
812 define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
813 ; CHECK: test_vshrn_high_n_s16
814 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
815 %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
816 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
817 %2 = bitcast <8 x i8> %a to <1 x i64>
818 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
819 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
820 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
824 define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
825 ; CHECK: test_vshrn_high_n_s32
826 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
827 %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
828 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
829 %2 = bitcast <4 x i16> %a to <1 x i64>
830 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
831 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
832 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
836 define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
837 ; CHECK: test_vshrn_high_n_s64
838 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
839 %1 = bitcast <2 x i32> %a to <1 x i64>
840 %2 = ashr <2 x i64> %b, <i64 19, i64 19>
841 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
842 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
843 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
844 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
848 define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
849 ; CHECK: test_vshrn_high_n_u16
850 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
851 %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
852 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
853 %2 = bitcast <8 x i8> %a to <1 x i64>
854 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
855 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
856 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
860 define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
861 ; CHECK: test_vshrn_high_n_u32
862 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
863 %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
864 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
865 %2 = bitcast <4 x i16> %a to <1 x i64>
866 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
867 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
868 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
872 define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
873 ; CHECK: test_vshrn_high_n_u64
874 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
875 %1 = bitcast <2 x i32> %a to <1 x i64>
876 %2 = lshr <2 x i64> %b, <i64 19, i64 19>
877 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
878 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
879 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
880 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
884 define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) {
885 ; CHECK: test_vqshrun_n_s16
886 ; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
887 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3)
888 ret <8 x i8> %vqshrun
892 define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) {
893 ; CHECK: test_vqshrun_n_s32
894 ; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
895 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9)
896 ret <4 x i16> %vqshrun
899 define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) {
900 ; CHECK: test_vqshrun_n_s64
901 ; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
902 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19)
903 ret <2 x i32> %vqshrun
906 define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
907 ; CHECK: test_vqshrun_high_n_s16
908 ; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
909 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3)
910 %1 = bitcast <8 x i8> %a to <1 x i64>
911 %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
912 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
913 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
917 define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
918 ; CHECK: test_vqshrun_high_n_s32
919 ; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
920 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9)
921 %1 = bitcast <4 x i16> %a to <1 x i64>
922 %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
923 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
924 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
928 define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
929 ; CHECK: test_vqshrun_high_n_s64
930 ; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
931 %1 = bitcast <2 x i32> %a to <1 x i64>
932 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19)
933 %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
934 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
935 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
939 define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) {
940 ; CHECK: test_vrshrn_n_s16
941 ; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
942 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3)
947 define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) {
948 ; CHECK: test_vrshrn_n_s32
949 ; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
950 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9)
951 ret <4 x i16> %vrshrn
955 define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) {
956 ; CHECK: test_vrshrn_n_s64
957 ; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
958 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19)
959 ret <2 x i32> %vrshrn
962 define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
963 ; CHECK: test_vrshrn_high_n_s16
964 ; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
965 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3)
966 %1 = bitcast <8 x i8> %a to <1 x i64>
967 %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
968 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
969 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
973 define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
974 ; CHECK: test_vrshrn_high_n_s32
975 ; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
976 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9)
977 %1 = bitcast <4 x i16> %a to <1 x i64>
978 %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
979 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
980 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
984 define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
985 ; CHECK: test_vrshrn_high_n_s64
986 ; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
987 %1 = bitcast <2 x i32> %a to <1 x i64>
988 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19)
989 %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
990 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
991 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
995 define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) {
996 ; CHECK: test_vqrshrun_n_s16
997 ; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
998 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3)
999 ret <8 x i8> %vqrshrun
1002 define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) {
1003 ; CHECK: test_vqrshrun_n_s32
1004 ; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1005 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9)
1006 ret <4 x i16> %vqrshrun
1009 define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) {
1010 ; CHECK: test_vqrshrun_n_s64
1011 ; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1012 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19)
1013 ret <2 x i32> %vqrshrun
1016 define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
1017 ; CHECK: test_vqrshrun_high_n_s16
1018 ; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1019 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3)
1020 %1 = bitcast <8 x i8> %a to <1 x i64>
1021 %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
1022 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1023 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1027 define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
1028 ; CHECK: test_vqrshrun_high_n_s32
1029 ; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1030 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9)
1031 %1 = bitcast <4 x i16> %a to <1 x i64>
1032 %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
1033 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1034 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1038 define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
1039 ; CHECK: test_vqrshrun_high_n_s64
1040 ; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1041 %1 = bitcast <2 x i32> %a to <1 x i64>
1042 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19)
1043 %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
1044 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1045 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1049 define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) {
1050 ; CHECK: test_vqshrn_n_s16
1051 ; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1052 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3)
1053 ret <8 x i8> %vqshrn
1057 define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) {
1058 ; CHECK: test_vqshrn_n_s32
1059 ; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1060 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9)
1061 ret <4 x i16> %vqshrn
1065 define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) {
1066 ; CHECK: test_vqshrn_n_s64
1067 ; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1068 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19)
1069 ret <2 x i32> %vqshrn
1073 define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) {
1074 ; CHECK: test_vqshrn_n_u16
1075 ; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1076 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3)
1077 ret <8 x i8> %vqshrn
1081 define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) {
1082 ; CHECK: test_vqshrn_n_u32
1083 ; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1084 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9)
1085 ret <4 x i16> %vqshrn
1089 define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) {
1090 ; CHECK: test_vqshrn_n_u64
1091 ; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1092 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19)
1093 ret <2 x i32> %vqshrn
1097 define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
1098 ; CHECK: test_vqshrn_high_n_s16
1099 ; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1100 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3)
1101 %1 = bitcast <8 x i8> %a to <1 x i64>
1102 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
1103 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1104 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1108 define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
1109 ; CHECK: test_vqshrn_high_n_s32
1110 ; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1111 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9)
1112 %1 = bitcast <4 x i16> %a to <1 x i64>
1113 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
1114 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1115 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1119 define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
1120 ; CHECK: test_vqshrn_high_n_s64
1121 ; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1122 %1 = bitcast <2 x i32> %a to <1 x i64>
1123 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19)
1124 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
1125 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1126 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1130 define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
1131 ; CHECK: test_vqshrn_high_n_u16
1132 ; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1133 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3)
1134 %1 = bitcast <8 x i8> %a to <1 x i64>
1135 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
1136 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1137 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1141 define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
1142 ; CHECK: test_vqshrn_high_n_u32
1143 ; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1144 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9)
1145 %1 = bitcast <4 x i16> %a to <1 x i64>
1146 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
1147 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1148 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1152 define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
1153 ; CHECK: test_vqshrn_high_n_u64
1154 ; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1155 %1 = bitcast <2 x i32> %a to <1 x i64>
1156 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19)
1157 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
1158 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1159 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1163 define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) {
1164 ; CHECK: test_vqrshrn_n_s16
1165 ; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1166 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3)
1167 ret <8 x i8> %vqrshrn
1171 define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) {
1172 ; CHECK: test_vqrshrn_n_s32
1173 ; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1174 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9)
1175 ret <4 x i16> %vqrshrn
1179 define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) {
1180 ; CHECK: test_vqrshrn_n_s64
1181 ; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1182 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19)
1183 ret <2 x i32> %vqrshrn
1187 define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) {
1188 ; CHECK: test_vqrshrn_n_u16
1189 ; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1190 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3)
1191 ret <8 x i8> %vqrshrn
1195 define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) {
1196 ; CHECK: test_vqrshrn_n_u32
1197 ; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1198 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9)
1199 ret <4 x i16> %vqrshrn
1203 define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) {
1204 ; CHECK: test_vqrshrn_n_u64
1205 ; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1206 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19)
1207 ret <2 x i32> %vqrshrn
1211 define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
1212 ; CHECK: test_vqrshrn_high_n_s16
1213 ; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1214 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3)
1215 %1 = bitcast <8 x i8> %a to <1 x i64>
1216 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
1217 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1218 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1222 define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
1223 ; CHECK: test_vqrshrn_high_n_s32
1224 ; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1225 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9)
1226 %1 = bitcast <4 x i16> %a to <1 x i64>
1227 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
1228 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1229 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1233 define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
1234 ; CHECK: test_vqrshrn_high_n_s64
1235 ; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1236 %1 = bitcast <2 x i32> %a to <1 x i64>
1237 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19)
1238 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
1239 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1240 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1244 define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
1245 ; CHECK: test_vqrshrn_high_n_u16
1246 ; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1247 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3)
1248 %1 = bitcast <8 x i8> %a to <1 x i64>
1249 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
1250 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1251 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1255 define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
1256 ; CHECK: test_vqrshrn_high_n_u32
1257 ; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1258 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9)
1259 %1 = bitcast <4 x i16> %a to <1 x i64>
1260 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
1261 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1262 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1266 define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
1267 ; CHECK: test_vqrshrn_high_n_u64
1268 ; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1269 %1 = bitcast <2 x i32> %a to <1 x i64>
1270 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19)
1271 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
1272 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1273 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1277 define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) {
1278 ; CHECK: test_vcvt_n_f32_s32
1279 ; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1280 %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
1281 ret <2 x float> %vcvt
1284 define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
1285 ; CHECK: test_vcvtq_n_f32_s32
1286 ; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1287 %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
1288 ret <4 x float> %vcvt
1291 define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) {
1292 ; CHECK: test_vcvtq_n_f64_s64
1293 ; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1294 %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
1295 ret <2 x double> %vcvt
1298 define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) {
1299 ; CHECK: test_vcvt_n_f32_u32
1300 ; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1301 %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
1302 ret <2 x float> %vcvt
1305 define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
1306 ; CHECK: test_vcvtq_n_f32_u32
1307 ; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1308 %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
1309 ret <4 x float> %vcvt
1312 define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) {
1313 ; CHECK: test_vcvtq_n_f64_u64
1314 ; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1315 %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
1316 ret <2 x double> %vcvt
1319 define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) {
1320 ; CHECK: test_vcvt_n_s32_f32
1321 ; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1322 %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31)
1326 define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
1327 ; CHECK: test_vcvtq_n_s32_f32
1328 ; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1329 %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31)
1333 define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) {
1334 ; CHECK: test_vcvtq_n_s64_f64
1335 ; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1336 %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50)
1340 define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) {
1341 ; CHECK: test_vcvt_n_u32_f32
1342 ; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1343 %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31)
1347 define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
1348 ; CHECK: test_vcvt_n_u32_f32
1349 ; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1350 %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31)
1354 define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) {
1355 ; CHECK: test_vcvtq_n_u64_f64
1356 ; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1357 %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50)
1361 declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32)
1363 declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32)
1365 declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32)
1367 declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32)
1369 declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32)
1371 declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32)
1373 declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32)
1375 declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32)
1377 declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32)
1379 declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32)
1381 declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32)
1383 declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32)
1385 declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32)
1387 declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32)
1389 declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32)
1391 declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32)
1393 declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32)
1395 declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32)
1397 declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32)
1399 declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32)
1401 declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32)
1403 declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32)
1405 declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32)
1407 declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32)
1409 declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32)
1411 declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32)
1413 declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32)
1415 declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32)
1417 declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32)
1419 declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32)
1421 declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32)
1423 declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32)
1425 declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32)
1427 declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32)
1429 declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32)
1431 declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
1433 declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
1435 declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
1437 declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
1439 declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
1441 declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
1443 declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
1445 declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
1447 declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
1449 declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
1451 declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
1453 declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
1455 declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
1457 declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
1459 declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32)
1461 declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32)
1463 declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32)
1465 declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32)
1467 declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32)
1469 declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32)
1471 declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32)
1473 declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32)
1475 declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32)
1477 declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32)
1479 declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32)
1481 declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32)
1483 declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32)
1485 declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32)
1487 declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32)
1489 declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32)
1491 declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32)
1493 declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32)
1495 declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32)
1497 declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32)
1499 declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32)
1501 declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
1503 declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
1505 declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
1507 declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
1509 declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
1511 declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
1513 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
1515 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
1517 declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
1519 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
1521 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
1523 declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
1525 define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
1526 ; CHECK-LABEL: test_vcvt_n_s64_f64
1527 ; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
1528 %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
1532 define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
1533 ; CHECK-LABEL: test_vcvt_n_u64_f64
1534 ; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
1535 %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
1539 define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
1540 ; CHECK-LABEL: test_vcvt_n_f64_s64
1541 ; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
1542 %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
1546 define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
1547 ; CHECK-LABEL: test_vcvt_n_f64_u64
1548 ; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
1549 %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
1553 declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
1554 declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
1555 declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
1556 declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)