1 ; RUN: llc -show-mc-encoding -march=arm -mcpu=cortex-a8 -mattr=+neon < %s | FileCheck %s
3 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4 %tmp1 = load <8 x i8>* %A
5 %tmp2 = load <8 x i8>* %B
6 ; CHECK: vqshl.s8 d16, d16, d17 @ encoding: [0xb0,0x04,0x41,0xf2]
7 %tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
11 define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 %tmp1 = load <4 x i16>* %A
13 %tmp2 = load <4 x i16>* %B
14 ; CHECK: vqshl.s16 d16, d16, d17 @ encoding: [0xb0,0x04,0x51,0xf2]
15 %tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
19 define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
20 %tmp1 = load <2 x i32>* %A
21 %tmp2 = load <2 x i32>* %B
22 ; CHECK: vqshl.s32 d16, d16, d17 @ encoding: [0xb0,0x04,0x61,0xf2]
23 %tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
27 define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
28 %tmp1 = load <1 x i64>* %A
29 %tmp2 = load <1 x i64>* %B
30 ; CHECK: vqshl.s64 d16, d16, d17 @ encoding: [0xb0,0x04,0x71,0xf2]
31 %tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
35 define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
36 %tmp1 = load <8 x i8>* %A
37 %tmp2 = load <8 x i8>* %B
38 %tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
42 define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
43 %tmp1 = load <4 x i16>* %A
44 %tmp2 = load <4 x i16>* %B
45 ; CHECK: vqshl.u8 d16, d16, d17 @ encoding: [0xb0,0x04,0x41,0xf3]
46 %tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
50 define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
51 %tmp1 = load <2 x i32>* %A
52 %tmp2 = load <2 x i32>* %B
53 ; CHECK: vqshl.u32 d16, d16, d17 @ encoding: [0xb0,0x04,0x61,0xf3]
54 %tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
58 define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
59 %tmp1 = load <1 x i64>* %A
60 %tmp2 = load <1 x i64>* %B
61 ; CHECK: vqshl.u64 d16, d16, d17 @ encoding: [0xb0,0x04,0x71,0xf3]
62 %tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
66 define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
67 %tmp1 = load <16 x i8>* %A
68 %tmp2 = load <16 x i8>* %B
69 ; CHECK: vqshl.s8 q8, q8, q9 @ encoding: [0xf0,0x04,0x42,0xf2]
70 %tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
74 define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
75 %tmp1 = load <8 x i16>* %A
76 %tmp2 = load <8 x i16>* %B
77 ; CHECK: vqshl.s16 q8, q8, q9 @ encoding: [0xf0,0x04,0x52,0xf2]
78 %tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
82 define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
83 %tmp1 = load <4 x i32>* %A
84 %tmp2 = load <4 x i32>* %B
85 %tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
89 define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
90 %tmp1 = load <2 x i64>* %A
91 %tmp2 = load <2 x i64>* %B
92 ; CHECK: vqshl.s32 q8, q8, q9 @ encoding: [0xf0,0x04,0x62,0xf2]
93 %tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
97 define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
98 %tmp1 = load <16 x i8>* %A
99 %tmp2 = load <16 x i8>* %B
100 ; CHECK: vqshl.u8 q8, q8, q9 @ encoding: [0xf0,0x04,0x42,0xf3]
101 %tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
105 define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
106 %tmp1 = load <8 x i16>* %A
107 %tmp2 = load <8 x i16>* %B
108 ; CHECK: vqshl.u16 q8, q8, q9 @ encoding: [0xf0,0x04,0x52,0xf3]
109 %tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
113 define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
114 %tmp1 = load <4 x i32>* %A
115 %tmp2 = load <4 x i32>* %B
116 ; CHECK: vqshl.u32 q8, q8, q9 @ encoding: [0xf0,0x04,0x62,0xf3]
117 %tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
121 define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
122 %tmp1 = load <2 x i64>* %A
123 %tmp2 = load <2 x i64>* %B
124 ; CHECK: vqshl.u64 q8, q8, q9 @ encoding: [0xf0,0x04,0x72,0xf3]
125 %tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
129 define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
130 %tmp1 = load <8 x i8>* %A
131 ; CHECK: vqshl.s8 d16, d16, #7 @ encoding: [0x30,0x07,0xcf,0xf2]
132 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
136 define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
137 %tmp1 = load <4 x i16>* %A
138 ; CHECK: vqshl.s16 d16, d16, #15 @ encoding: [0x30,0x07,0xdf,0xf2]
139 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
143 define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
144 %tmp1 = load <2 x i32>* %A
145 ; CHECK: vqshl.s32 d16, d16, #31 @ encoding: [0x30,0x07,0xff,0xf2]
146 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
150 define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
151 %tmp1 = load <1 x i64>* %A
152 ; CHECK: vqshl.s64 d16, d16, #63 @ encoding: [0xb0,0x07,0xff,0xf2]
153 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
157 define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
158 %tmp1 = load <8 x i8>* %A
159 ; CHECK: vqshl.u8 d16, d16, #7 @ encoding: [0x30,0x07,0xcf,0xf3]
160 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
164 define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
165 %tmp1 = load <4 x i16>* %A
166 ; CHECK: vqshl.u16 d16, d16, #15 @ encoding: [0x30,0x07,0xdf,0xf3]
167 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
171 define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
172 %tmp1 = load <2 x i32>* %A
173 ; CHECK: vqshl.u32 d16, d16, #31 @ encoding: [0x30,0x07,0xff,0xf3]
174 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
178 define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
179 %tmp1 = load <1 x i64>* %A
180 ; CHECK: vqshl.u64 d16, d16, #63 @ encoding: [0xb0,0x07,0xff,0xf3]
181 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
185 define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
186 %tmp1 = load <8 x i8>* %A
187 ; CHECK: vqshlu.s8 d16, d16, #7 @ encoding: [0x30,0x06,0xcf,0xf3]
188 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
192 define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
193 %tmp1 = load <4 x i16>* %A
194 ; CHECK: vqshlu.s16 d16, d16, #15 @ encoding: [0x30,0x06,0xdf,0xf3]
195 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
199 define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
200 %tmp1 = load <2 x i32>* %A
201 ; CHECK: vqshlu.s32 d16, d16, #31 @ encoding: [0x30,0x06,0xff,0xf3]
202 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
206 define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
207 %tmp1 = load <1 x i64>* %A
208 ; CHECK: vqshlu.s64 d16, d16, #63 @ encoding: [0xb0,0x06,0xff,0xf3]
209 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
213 define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
214 %tmp1 = load <16 x i8>* %A
215 ; CHECK: vqshl.s8 q8, q8, #7 @ encoding: [0x70,0x07,0xcf,0xf2]
216 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
220 define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
221 %tmp1 = load <8 x i16>* %A
222 ; CHECK: vqshl.s16 q8, q8, #15 @ encoding: [0x70,0x07,0xdf,0xf2]
223 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
227 define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
228 %tmp1 = load <4 x i32>* %A
229 ; CHECK: vqshl.s32 q8, q8, #31 @ encoding: [0x70,0x07,0xff,0xf2]
230 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
234 define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
235 %tmp1 = load <2 x i64>* %A
236 ; CHECK: vqshl.s64 q8, q8, #63 @ encoding: [0xf0,0x07,0xff,0xf2]
237 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
241 define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
242 %tmp1 = load <16 x i8>* %A
243 ; CHECK: vqshl.u8 q8, q8, #7 @ encoding: [0x70,0x07,0xcf,0xf3]
244 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
248 define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
249 %tmp1 = load <8 x i16>* %A
250 ; CHECK: vqshl.u16 q8, q8, #15 @ encoding: [0x70,0x07,0xdf,0xf3]
251 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
255 define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
256 %tmp1 = load <4 x i32>* %A
257 ; CHECK: vqshl.u32 q8, q8, #31 @ encoding: [0x70,0x07,0xff,0xf3]
258 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
262 define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
263 %tmp1 = load <2 x i64>* %A
264 ; CHECK: vqshl.u64 q8, q8, #63 @ encoding: [0xf0,0x07,0xff,0xf3]
265 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
269 define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
270 %tmp1 = load <16 x i8>* %A
271 ; CHECK: vqshlu.s8 q8, q8, #7 @ encoding: [0x70,0x06,0xcf,0xf3]
272 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
276 define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
277 %tmp1 = load <8 x i16>* %A
278 ; CHECK: vqshlu.s16 q8, q8, #15 @ encoding: [0x70,0x06,0xdf,0xf3]
279 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
283 define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
284 %tmp1 = load <4 x i32>* %A
285 ; CHECK: vqshlu.s32 q8, q8, #31 @ encoding: [0x70,0x06,0xff,0xf3]
286 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
290 define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
291 %tmp1 = load <2 x i64>* %A
292 ; CHECK: vqshlu.s64 q8, q8, #63 @ encoding: [0xf0,0x06,0xff,0xf3]
293 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
297 declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
298 declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
299 declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
300 declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
302 declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
303 declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
304 declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
305 declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
307 declare <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
308 declare <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
309 declare <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
310 declare <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
312 declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
313 declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
314 declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
315 declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
317 declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
318 declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
319 declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
320 declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
322 declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
323 declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
324 declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
325 declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
327 define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
328 %tmp1 = load <8 x i8>* %A
329 %tmp2 = load <8 x i8>* %B
330 ; CHECK: vqrshl.s8 d16, d16, d17 @ encoding: [0xb0,0x05,0x41,0xf2]
331 %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
335 define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
336 %tmp1 = load <4 x i16>* %A
337 %tmp2 = load <4 x i16>* %B
338 ; CHECK: vqrshl.s16 d16, d16, d17 @ encoding: [0xb0,0x05,0x51,0xf2]
339 %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
343 define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
344 %tmp1 = load <2 x i32>* %A
345 %tmp2 = load <2 x i32>* %B
346 ; CHECK: vqrshl.s32 d16, d16, d17 @ encoding: [0xb0,0x05,0x61,0xf2]
347 %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
351 define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
352 %tmp1 = load <1 x i64>* %A
353 %tmp2 = load <1 x i64>* %B
354 ; CHECK: vqrshl.s64 d16, d16, d17 @ encoding: [0xb0,0x05,0x71,0xf2]
355 %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
359 define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
360 %tmp1 = load <8 x i8>* %A
361 %tmp2 = load <8 x i8>* %B
362 ; CHECK: vqrshl.u8 d16, d16, d17 @ encoding: [0xb0,0x05,0x41,0xf3
363 %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
367 define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
368 %tmp1 = load <4 x i16>* %A
369 %tmp2 = load <4 x i16>* %B
370 ; CHECK: vqrshl.u16 d16, d16, d17 @ encoding: [0xb0,0x05,0x51,0xf3]
371 %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
375 define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
376 %tmp1 = load <2 x i32>* %A
377 %tmp2 = load <2 x i32>* %B
378 ; CHECK: vqrshl.u32 d16, d16, d17 @ encoding: [0xb0,0x05,0x61,0xf3]
379 %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
383 define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
384 %tmp1 = load <1 x i64>* %A
385 %tmp2 = load <1 x i64>* %B
386 ; CHECK: vqrshl.u64 d16, d16, d17 @ encoding: [0xb0,0x05,0x71,0xf3]
387 %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
391 define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
392 %tmp1 = load <16 x i8>* %A
393 %tmp2 = load <16 x i8>* %B
394 ; CHECK: vqrshl.s8 q8, q8, q9 @ encoding: [0xf0,0x05,0x42,0xf2]
395 %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
399 define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
400 %tmp1 = load <8 x i16>* %A
401 %tmp2 = load <8 x i16>* %B
402 ; CHECK: vqrshl.s16 q8, q8, q9 @ encoding: [0xf0,0x05,0x52,0xf2]
403 %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
407 define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
408 %tmp1 = load <4 x i32>* %A
409 %tmp2 = load <4 x i32>* %B
410 ; CHECK: vqrshl.s32 q8, q8, q9 @ encoding: [0xf0,0x05,0x62,0xf2]
411 %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
415 define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
416 %tmp1 = load <2 x i64>* %A
417 %tmp2 = load <2 x i64>* %B
418 ; CHECK: vqrshl.s64 q8, q8, q9 @ encoding: [0xf0,0x05,0x72,0xf2]
419 %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
423 define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
424 %tmp1 = load <16 x i8>* %A
425 %tmp2 = load <16 x i8>* %B
426 ; CHECK: vqrshl.u8 q8, q8, q9 @ encoding: [0xf0,0x05,0x42,0xf3]
427 %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
431 define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
432 %tmp1 = load <8 x i16>* %A
433 %tmp2 = load <8 x i16>* %B
434 ; CHECK: vqrshl.u16 q8, q8, q9 @ encoding: [0xf0,0x05,0x52,0xf3]
435 %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
439 define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
440 %tmp1 = load <4 x i32>* %A
441 %tmp2 = load <4 x i32>* %B
442 ; CHECK: vqrshl.u32 q8, q8, q9 @ encoding: [0xf0,0x05,0x62,0xf3]
443 %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
447 define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
448 %tmp1 = load <2 x i64>* %A
449 %tmp2 = load <2 x i64>* %B
450 ; CHECK: vqrshl.u64 q8, q8, q9 @ encoding: [0xf0,0x05,0x72,0xf3]
451 %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
455 declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
456 declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
457 declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
458 declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
460 declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
461 declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
462 declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
463 declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
465 declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
466 declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
467 declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
468 declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
470 declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
471 declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
472 declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
473 declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
475 define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
476 %tmp1 = load <8 x i16>* %A
477 ; CHECK: vqshrn.s16 d16, q8, #8 @ encoding: [0x30,0x09,0xc8,0xf2]
478 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
482 define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
483 %tmp1 = load <4 x i32>* %A
484 ; CHECK: vqshrn.s32 d16, q8, #16 @ encoding: [0x30,0x09,0xd0,0xf2]
485 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
489 define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
490 %tmp1 = load <2 x i64>* %A
491 ; CHECK: vqshrn.s64 d16, q8, #32 @ encoding: [0x30,0x09,0xe0,0xf2]
492 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
496 define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
497 %tmp1 = load <8 x i16>* %A
498 ; CHECK: vqshrn.u16 d16, q8, #8 @ encoding: [0x30,0x09,0xc8,0xf3]
499 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
503 define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
504 %tmp1 = load <4 x i32>* %A
505 ; CHECK: vqshrn.u32 d16, q8, #16 @ encoding: [0x30,0x09,0xd0,0xf3]
506 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
510 define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
511 %tmp1 = load <2 x i64>* %A
512 ; CHECK: vqshrn.u64 d16, q8, #32 @ encoding: [0x30,0x09,0xe0,0xf3]
513 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
517 define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
518 %tmp1 = load <8 x i16>* %A
519 ; CHECK: vqshrun.s16 d16, q8, #8 @ encoding: [0x30,0x08,0xc8,0xf3]
520 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
524 define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
525 %tmp1 = load <4 x i32>* %A
526 ; CHECK: vqshrun.s32 d16, q8, #16 @ encoding: [0x30,0x08,0xd0,0xf3]
527 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
531 define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
532 %tmp1 = load <2 x i64>* %A
533 ; CHECK: vqshrun.s64 d16, q8, #32 @ encoding: [0x30,0x08,0xe0,0xf3]
534 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
538 declare <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
539 declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
540 declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
542 declare <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
543 declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
544 declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
546 declare <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
547 declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
548 declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
550 define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
551 %tmp1 = load <8 x i16>* %A
552 ; CHECK: vqrshrn.s16 d16, q8, #8 @ encoding: [0x70,0x09,0xc8,0xf2]
553 %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
557 define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
558 %tmp1 = load <4 x i32>* %A
559 ; CHECK: vqrshrn.s32 d16, q8, #16 @ encoding: [0x70,0x09,0xd0,0xf2]
560 %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
564 define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
565 %tmp1 = load <2 x i64>* %A
566 ; CHECK: vqrshrn.s64 d16, q8, #32 @ encoding: [0x70,0x09,0xe0,0xf2]
567 %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
571 define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
572 %tmp1 = load <8 x i16>* %A
573 ; CHECK: vqrshrn.u16 d16, q8, #8 @ encoding: [0x70,0x09,0xc8,0xf3]
574 %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
578 define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
579 %tmp1 = load <4 x i32>* %A
580 ; CHECK: vqrshrn.u32 d16, q8, #16 @ encoding: [0x70,0x09,0xd0,0xf3]
581 %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
585 define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
586 %tmp1 = load <2 x i64>* %A
587 ; CHECK: vqrshrn.u64 d16, q8, #32 @ encoding: [0x70,0x09,0xe0,0xf3]
588 %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
592 define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
593 %tmp1 = load <8 x i16>* %A
594 ; CHECK: vqrshrun.s16 d16, q8, #8 @ encoding: [0x70,0x08,0xc8,0xf3]
595 %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
599 define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
600 %tmp1 = load <4 x i32>* %A
601 ; CHECK: vqrshrun.s32 d16, q8, #16 @ encoding: [0x70,0x08,0xd0,0xf3]
602 %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
606 define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
607 %tmp1 = load <2 x i64>* %A
608 ; CHECK: vqrshrun.s64 d16, q8, #32 @ encoding: [0x70,0x08,0xe0,0xf3]
609 %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
613 declare <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
614 declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
615 declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
617 declare <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
618 declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
619 declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
621 declare <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
622 declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
623 declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone