1 ; RUN: llc -show-mc-encoding -march=arm -mcpu=cortex-a8 -mattr=+neon < %s | FileCheck %s
4 define <8 x i8> @vmov_8xi8() nounwind {
5 ; CHECK: vmov.i8 d16, #0x8 @ encoding: [0x18,0x0e,0xc0,0xf2]
6 ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
10 define <4 x i16> @vmov_4xi16a() nounwind {
11 ; CHECK: vmov.i16 d16, #0x10 @ encoding: [0x10,0x08,0xc1,0xf2]
12 ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
16 define <4 x i16> @vmov_4xi16b() nounwind {
17 ; CHECK: vmov.i16 d16, #0x1000 @ encoding: [0x10,0x0a,0xc1,0xf2]
18 ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
22 define <2 x i32> @vmov_2xi32a() nounwind {
23 ; CHECK: vmov.i32 d16, #0x20 @ encoding: [0x10,0x00,0xc2,0xf2]
24 ret <2 x i32> < i32 32, i32 32 >
28 define <2 x i32> @vmov_2xi32b() nounwind {
29 ; CHECK: vmov.i32 d16, #0x2000 @ encoding: [0x10,0x02,0xc2,0xf2]
30 ret <2 x i32> < i32 8192, i32 8192 >
34 define <2 x i32> @vmov_2xi32c() nounwind {
35 ; CHECK: vmov.i32 d16, #0x200000 @ encoding: [0x10,0x04,0xc2,0xf2]
36 ret <2 x i32> < i32 2097152, i32 2097152 >
40 define <2 x i32> @vmov_2xi32d() nounwind {
41 ; CHECK: vmov.i32 d16, #0x20000000 @ encoding: [0x10,0x06,0xc2,0xf2]
42 ret <2 x i32> < i32 536870912, i32 536870912 >
46 define <2 x i32> @vmov_2xi32e() nounwind {
47 ; CHECK: vmov.i32 d16, #0x20FF @ encoding: [0x10,0x0c,0xc2,0xf2]
48 ret <2 x i32> < i32 8447, i32 8447 >
52 define <2 x i32> @vmov_2xi32f() nounwind {
53 ; CHECK: vmov.i32 d16, #0x20FFFF @ encoding: [0x10,0x0d,0xc2,0xf2]
54 ret <2 x i32> < i32 2162687, i32 2162687 >
58 define <1 x i64> @vmov_1xi64() nounwind {
59 ; CHECK: vmov.i64 d16, #0xFF0000FF0000FFFF @ encoding: [0x33,0x0e,0xc1,0xf3]
60 ret <1 x i64> < i64 18374687574888349695 >
64 define <16 x i8> @vmov_16xi8() nounwind {
65 ; CHECK: vmov.i8 q8, #0x8 @ encoding: [0x58,0x0e,0xc0,0xf2]
66 ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
70 define <8 x i16> @vmov_8xi16a() nounwind {
71 ; CHECK: vmov.i16 q8, #0x10 @ encoding: [0x50,0x08,0xc1,0xf2]
72 ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
76 define <8 x i16> @vmov_8xi16b() nounwind {
77 ; CHECK: vmov.i16 q8, #0x1000 @ encoding: [0x50,0x0a,0xc1,0xf2]
78 ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
82 define <4 x i32> @vmov_4xi32a() nounwind {
83 ; CHECK: vmov.i32 q8, #0x20 @ encoding: [0x50,0x00,0xc2,0xf2]
84 ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
88 define <4 x i32> @vmov_4xi32b() nounwind {
89 ; CHECK: vmov.i32 q8, #0x2000 @ encoding: [0x50,0x02,0xc2,0xf2]
90 ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
94 define <4 x i32> @vmov_4xi32c() nounwind {
95 ; CHECK: vmov.i32 q8, #0x200000 @ encoding: [0x50,0x04,0xc2,0xf2]
96 ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
100 define <4 x i32> @vmov_4xi32d() nounwind {
101 ; CHECK: vmov.i32 q8, #0x20000000 @ encoding: [0x50,0x06,0xc2,0xf2]
102 ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
106 define <4 x i32> @vmov_4xi32e() nounwind {
107 ; CHECK: vmov.i32 q8, #0x20FF @ encoding: [0x50,0x0c,0xc2,0xf2]
108 ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
112 define <4 x i32> @vmov_4xi32f() nounwind {
113 ; CHECK: vmov.i32 q8, #0x20FFFF @ encoding: [0x50,0x0d,0xc2,0xf2]
114 ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
118 define <2 x i64> @vmov_2xi64() nounwind {
119 ; CHECK: vmov.i64 q8, #0xFF0000FF0000FFFF @ encoding: [0x73,0x0e,0xc1,0xf3]
120 ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
124 define <4 x i16> @vmvn_4xi16a() nounwind {
125 ; CHECK: vmvn.i16 d16, #0x10 @ encoding: [0x30,0x08,0xc1,0xf2]
126 ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
130 define <4 x i16> @vmvn_4xi16b() nounwind {
131 ; CHECK: vmvn.i16 d16, #0x1000 @ encoding: [0x30,0x0a,0xc1,0xf2]
132 ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
136 define <2 x i32> @vmvn_2xi32a() nounwind {
137 ; CHECK: vmvn.i32 d16, #0x20 @ encoding: [0x30,0x00,0xc2,0xf2]
138 ret <2 x i32> < i32 4294967263, i32 4294967263 >
142 define <2 x i32> @vmvn_2xi32b() nounwind {
143 ; CHECK: vmvn.i32 d16, #0x2000 @ encoding: [0x30,0x02,0xc2,0xf2]
144 ret <2 x i32> < i32 4294959103, i32 4294959103 >
148 define <2 x i32> @vmvn_2xi32c() nounwind {
149 ; CHECK: vmvn.i32 d16, #0x200000 @ encoding: [0x30,0x04,0xc2,0xf2]
150 ret <2 x i32> < i32 4292870143, i32 4292870143 >
154 define <2 x i32> @vmvn_2xi32d() nounwind {
155 ; CHECK: vmvn.i32 d16, #0x20000000 @ encoding: [0x30,0x06,0xc2,0xf2]
156 ret <2 x i32> < i32 3758096383, i32 3758096383 >
160 define <2 x i32> @vmvn_2xi32e() nounwind {
161 ; CHECK: vmvn.i32 d16, #0x20FF @ encoding: [0x30,0x0c,0xc2,0xf2]
162 ret <2 x i32> < i32 4294958848, i32 4294958848 >
166 define <2 x i32> @vmvn_2xi32f() nounwind {
167 ; CHECK: vmvn.i32 d16, #0x20FFFF @ encoding: [0x30,0x0d,0xc2,0xf2]
168 ret <2 x i32> < i32 4292804608, i32 4292804608 >
171 define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
172 %tmp1 = load <8 x i8>* %A
173 ; CHECK: vmovl.s8 q8, d16 @ encoding: [0x30,0x0a,0xc8,0xf2]
174 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
178 define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
179 %tmp1 = load <4 x i16>* %A
180 ; CHECK: vmovl.s16 q8, d16 @ encoding: [0x30,0x0a,0xd0,0xf2]
181 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
185 define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
186 %tmp1 = load <2 x i32>* %A
187 ; CHECK: vmovl.s32 q8, d16 @ encoding: [0x30,0x0a,0xe0,0xf2]
188 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
192 define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
193 %tmp1 = load <8 x i8>* %A
194 ; CHECK: vmovl.u8 q8, d16 @ encoding: [0x30,0x0a,0xc8,0xf3]
195 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
199 define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
200 %tmp1 = load <4 x i16>* %A
201 ; CHECK: vmovl.u16 q8, d16 @ encoding: [0x30,0x0a,0xd0,0xf3]
202 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
206 define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
207 %tmp1 = load <2 x i32>* %A
208 ; CHECK: vmovl.u32 q8, d16 @ encoding: [0x30,0x0a,0xe0,0xf3]
209 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
213 define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
214 %tmp1 = load <8 x i16>* %A
215 ; CHECK: vmovn.i16 d16, q8 @ encoding: [0x20,0x02,0xf2,0xf3]
216 %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
220 define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
221 %tmp1 = load <4 x i32>* %A
222 ; CHECK: vmovn.i32 d16, q8 @ encoding: [0x20,0x02,0xf6,0xf3]
223 %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
227 define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
228 %tmp1 = load <2 x i64>* %A
229 ; CHECK: vmovn.i64 d16, q8 @ encoding: [0x20,0x02,0xfa,0xf3]
230 %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
234 define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
235 %tmp1 = load <8 x i16>* %A
236 ; CHECK: vqmovn.s16 d16, q8 @ encoding: [0xa0,0x02,0xf2,0xf3]
237 %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
241 define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
242 %tmp1 = load <4 x i32>* %A
243 ; CHECK: vqmovn.s32 d16, q8 @ encoding: [0xa0,0x02,0xf6,0xf3]
244 %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
248 define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
249 %tmp1 = load <2 x i64>* %A
250 ; CHECK: vqmovn.s64 d16, q8 @ encoding: [0xa0,0x02,0xfa,0xf3]
251 %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
255 define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
256 %tmp1 = load <8 x i16>* %A
257 ; CHECK: vqmovn.u16 d16, q8 @ encoding: [0xe0,0x02,0xf2,0xf3]
258 %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
262 define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
263 %tmp1 = load <4 x i32>* %A
264 ; CHECK: vqmovn.u32 d16, q8 @ encoding: [0xe0,0x02,0xf6,0xf3]
265 %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
269 define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
270 %tmp1 = load <2 x i64>* %A
271 ; CHECK: vqmovn.u64 d16, q8 @ encoding: [0xe0,0x02,0xfa,0xf3]
272 %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
276 define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
277 %tmp1 = load <8 x i16>* %A
278 ; CHECK: vqmovun.s16 d16, q8 @ encoding: [0x60,0x02,0xf2,0xf3]
279 %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
283 define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
284 %tmp1 = load <4 x i32>* %A
285 ; CHECK: vqmovun.s32 d16, q8 @ encoding: [0x60,0x02,0xf6,0xf3]
286 %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
290 define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
291 %tmp1 = load <2 x i64>* %A
292 ; CHECK: vqmovun.s64 d16, q8 @ encoding: [0x60,0x02,0xfa,0xf3]
293 %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
297 declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
298 declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
299 declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
301 declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
302 declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
303 declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
305 declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
306 declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
307 declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
309 define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
310 %tmp1 = load <8 x i8>* %A
311 ; CHECK: vmov.s8 r0, d16[1] @ encoding: [0xb0,0x0b,0x50,0xee]
312 %tmp2 = extractelement <8 x i8> %tmp1, i32 1
313 %tmp3 = sext i8 %tmp2 to i32
317 define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
318 %tmp1 = load <4 x i16>* %A
319 ; CHECK: vmov.s16 r0, d16[1] @ encoding: [0xf0,0x0b,0x10,0xee]
320 %tmp2 = extractelement <4 x i16> %tmp1, i32 1
321 %tmp3 = sext i16 %tmp2 to i32
325 define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
326 %tmp1 = load <8 x i8>* %A
327 ; CHECK: vmov.u8 r0, d16[1] @ encoding: [0xb0,0x0b,0xd0,0xee]
328 %tmp2 = extractelement <8 x i8> %tmp1, i32 1
329 %tmp3 = zext i8 %tmp2 to i32
333 define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
334 %tmp1 = load <4 x i16>* %A
335 ; CHECK: vmov.u16 r0, d16[1] @ encoding: [0xf0,0x0b,0x90,0xee]
336 %tmp2 = extractelement <4 x i16> %tmp1, i32 1
337 %tmp3 = zext i16 %tmp2 to i32
341 ; Do a vector add to keep the extraction from being done directly from memory.
342 define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
343 %tmp1 = load <2 x i32>* %A
344 %tmp2 = add <2 x i32> %tmp1, %tmp1
345 ; CHECK: vmov.32 r0, d16[1] @ encoding: [0x90,0x0b,0x30,0xee]
346 %tmp3 = extractelement <2 x i32> %tmp2, i32 1
350 define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
351 %tmp1 = load <16 x i8>* %A
352 ; CHECK: vmov.s8 r0, d16[1] @ encoding: [0xb0,0x0b,0x50,0xee]
353 %tmp2 = extractelement <16 x i8> %tmp1, i32 1
354 %tmp3 = sext i8 %tmp2 to i32
358 define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
359 %tmp1 = load <8 x i16>* %A
360 ; CHECK: vmov.s16 r0, d16[1] @ encoding: [0xf0,0x0b,0x10,0xee]
361 %tmp2 = extractelement <8 x i16> %tmp1, i32 1
362 %tmp3 = sext i16 %tmp2 to i32
366 define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
367 %tmp1 = load <16 x i8>* %A
368 ; CHECK: vmov.u8 r0, d16[1] @ encoding: [0xb0,0x0b,0xd0,0xee]
369 %tmp2 = extractelement <16 x i8> %tmp1, i32 1
370 %tmp3 = zext i8 %tmp2 to i32
374 define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
375 %tmp1 = load <8 x i16>* %A
376 ; CHECK: vmov.u16 r0, d16[1] @ encoding: [0xf0,0x0b,0x90,0xee]
377 %tmp2 = extractelement <8 x i16> %tmp1, i32 1
378 %tmp3 = zext i16 %tmp2 to i32
382 ; Do a vector add to keep the extraction from being done directly from memory.
383 define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
384 %tmp1 = load <4 x i32>* %A
385 %tmp2 = add <4 x i32> %tmp1, %tmp1
386 ; CHECK: vmov.32 r0, d16[1] @ encoding: [0x90,0x0b,0x30,0xee]
387 %tmp3 = extractelement <4 x i32> %tmp2, i32 1
391 define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
392 %tmp1 = load <8 x i8>* %A
393 ; CHECK: vmov.8 d16[1], r1 @ encoding: [0xb0,0x1b,0x40,0xee]
394 %tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
398 define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
399 %tmp1 = load <4 x i16>* %A
400 ; CHECK: vmov.16 d16[1], r1 @ encoding: [0xf0,0x1b,0x00,0xee
401 %tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
405 define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
406 %tmp1 = load <2 x i32>* %A
407 ; CHECK: vmov.32 d16[1], r1 @ encoding: [0x90,0x1b,0x20,0xee]
408 %tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
412 define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
413 %tmp1 = load <16 x i8>* %A
414 ; CHECK: vmov.8 d18[1], r1 @ encoding: [0xb0,0x1b,0x42,0xee]
415 %tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
419 define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
420 %tmp1 = load <8 x i16>* %A
421 ; CHECK: vmov.16 d18[1], r1 @ encoding: [0xf0,0x1b,0x02,0xee]
422 %tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
426 define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
427 %tmp1 = load <4 x i32>* %A
428 ; CHECK: vmov.32 d18[1], r1 @ encoding: [0x90,0x1b,0x22,0xee]
429 %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1