1 ; RUN: llc -show-mc-encoding -march=arm -mcpu=cortex-a8 -mattr=+neon < %s | FileCheck %s
3 declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
4 declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
5 declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
8 define <8 x i8> @vpadd_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
9 %tmp1 = load <8 x i8>* %A
10 %tmp2 = load <8 x i8>* %B
11 ; CHECK: vpadd.i8 d16, d17, d16 @ encoding: [0xb0,0x0b,0x41,0xf2]
12 %tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
17 define <4 x i16> @vpadd_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
18 %tmp1 = load <4 x i16>* %A
19 %tmp2 = load <4 x i16>* %B
20 ; CHECK: vpadd.i16 d16, d17, d16 @ encoding: [0xb0,0x0b,0x51,0xf2]
21 %tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
26 define <2 x i32> @vpadd_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
27 %tmp1 = load <2 x i32>* %A
28 %tmp2 = load <2 x i32>* %B
29 ; CHECK: vpadd.i32 d16, d17, d16 @ encoding: [0xb0,0x0b,0x61,0xf2]
30 %tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
34 declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
36 ; CHECK: vpadd_2xfloat
37 define <2 x float> @vpadd_2xfloat(<2 x float>* %A, <2 x float>* %B) nounwind {
38 %tmp1 = load <2 x float>* %A
39 %tmp2 = load <2 x float>* %B
40 ; CHECK: vpadd.f32 d16, d16, d17 @ encoding: [0xa1,0x0d,0x40,0xf3]
41 %tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
45 declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
46 declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
47 declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
50 define <4 x i16> @vpaddls_8xi8(<8 x i8>* %A) nounwind {
51 %tmp1 = load <8 x i8>* %A
52 ; CHECK: vpaddl.s8 d16, d16 @ encoding: [0x20,0x02,0xf0,0xf3]
53 %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
57 ; CHECK: vpaddls_4xi16
58 define <2 x i32> @vpaddls_4xi16(<4 x i16>* %A) nounwind {
59 %tmp1 = load <4 x i16>* %A
60 ; CHECK: vpaddl.s16 d16, d16 @ encoding: [0x20,0x02,0xf4,0xf3]
61 %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
65 ; CHECK: vpaddls_2xi32
66 define <1 x i64> @vpaddls_2xi32(<2 x i32>* %A) nounwind {
67 %tmp1 = load <2 x i32>* %A
68 ; CHECK: vpaddl.s32 d16, d16 @ encoding: [0x20,0x02,0xf8,0xf3]
69 %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
73 declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone
74 declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
75 declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone
78 define <4 x i16> @vpaddlu_8xi8(<8 x i8>* %A) nounwind {
79 %tmp1 = load <8 x i8>* %A
80 ; CHECK: vpaddl.u8 d16, d16 @ encoding: [0xa0,0x02,0xf0,0xf3]
81 %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
85 ; CHECK: vpaddlu_4xi16
86 define <2 x i32> @vpaddlu_4xi16(<4 x i16>* %A) nounwind {
87 %tmp1 = load <4 x i16>* %A
88 ; CHECK: vpaddl.u16 d16, d16 @ encoding: [0xa0,0x02,0xf4,0xf3]
89 %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
93 ; CHECK: vpaddlu_2xi32
94 define <1 x i64> @vpaddlu_2xi32(<2 x i32>* %A) nounwind {
95 %tmp1 = load <2 x i32>* %A
96 ; CHECK: vpaddl.u32 d16, d16 @ encoding: [0xa0,0x02,0xf8,0xf3]
97 %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
101 declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone
102 declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone
103 declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone
105 ; CHECK: vpaddls_16xi8
106 define <8 x i16> @vpaddls_16xi8(<16 x i8>* %A) nounwind {
107 %tmp1 = load <16 x i8>* %A
108 ; CHECK: vpaddl.s8 q8, q8 @ encoding: [0x60,0x02,0xf0,0xf3]
109 %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
113 ; CHECK: vpaddls_8xi16
114 define <4 x i32> @vpaddls_8xi16(<8 x i16>* %A) nounwind {
115 %tmp1 = load <8 x i16>* %A
116 ; CHECK: vpaddl.s16 q8, q8 @ encoding: [0x60,0x02,0xf4,0xf3]
117 %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
121 ; CHECK: vpaddls_4xi32
122 define <2 x i64> @vpaddls_4xi32(<4 x i32>* %A) nounwind {
123 %tmp1 = load <4 x i32>* %A
124 ; CHECK: vpaddl.s32 q8, q8 @ encoding: [0x60,0x02,0xf8,0xf3]
125 %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
129 declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone
130 declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
131 declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
133 ; CHECK: vpaddlu_16xi8
134 define <8 x i16> @vpaddlu_16xi8(<16 x i8>* %A) nounwind {
135 %tmp1 = load <16 x i8>* %A
136 ; CHECK: vpaddl.u8 q8, q8 @ encoding: [0xe0,0x02,0xf0,0xf3]
137 %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
141 ; CHECK: vpaddlu_8xi16
142 define <4 x i32> @vpaddlu_8xi16(<8 x i16>* %A) nounwind {
143 %tmp1 = load <8 x i16>* %A
144 ; CHECK: vpaddl.u16 q8, q8 @ encoding: [0xe0,0x02,0xf4,0xf3]
145 %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
149 ; CHECK: vpaddlu_4xi32
150 define <2 x i64> @vpaddlu_4xi32(<4 x i32>* %A) nounwind {
151 %tmp1 = load <4 x i32>* %A
152 ; CHECK: vpaddl.u32 q8, q8 @ encoding: [0xe0,0x02,0xf8,0xf3]
153 %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
157 declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) nounwind readnone
158 declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) nounwind readnone
159 declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) nounwind readnone
161 ; CHECK: vpadals_8xi8
162 define <4 x i16> @vpadals_8xi8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
163 %tmp1 = load <4 x i16>* %A
164 %tmp2 = load <8 x i8>* %B
165 ; CHECK: vpadal.s8 d16, d17 @ encoding: [0x21,0x06,0xf0,0xf3]
166 %tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
170 ; CHECK: vpadals_4xi16
171 define <2 x i32> @vpadals_4xi16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
172 %tmp1 = load <2 x i32>* %A
173 %tmp2 = load <4 x i16>* %B
174 ; CHECK: vpadal.s16 d16, d17 @ encoding: [0x21,0x06,0xf4,0xf3]
175 %tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
179 ; CHECK: vpadals_2xi32
180 define <1 x i64> @vpadals_2xi32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
181 %tmp1 = load <1 x i64>* %A
182 %tmp2 = load <2 x i32>* %B
183 ; CHECK: vpadal.s32 d16, d17 @ encoding: [0x21,0x06,0xf8,0xf3]
184 %tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
188 declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) nounwind readnone
189 declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) nounwind readnone
190 declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) nounwind readnone
192 ; CHECK: vpadalu_8xi8
193 define <4 x i16> @vpadalu_8xi8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
194 %tmp1 = load <4 x i16>* %A
195 %tmp2 = load <8 x i8>* %B
196 ; CHECK: vpadal.u8 d16, d17 @ encoding: [0xa1,0x06,0xf0,0xf3]
197 %tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
201 ; CHECK: vpadalu_4xi16
202 define <2 x i32> @vpadalu_4xi16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
203 %tmp1 = load <2 x i32>* %A
204 %tmp2 = load <4 x i16>* %B
205 ; CHECK: vpadal.u16 d16, d17 @ encoding: [0xa1,0x06,0xf4,0xf3]
206 %tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
210 ; CHECK: vpadalu_2xi32
211 define <1 x i64> @vpadalu_2xi32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
212 %tmp1 = load <1 x i64>* %A
213 %tmp2 = load <2 x i32>* %B
214 ; CHECK: vpadal.u32 d16, d17 @ encoding: [0xa1,0x06,0xf8,0xf3]
215 %tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
219 declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) nounwind readnone
220 declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) nounwind readnone
221 declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) nounwind readnone
223 ; CHECK: vpadals_16xi8
224 define <8 x i16> @vpadals_16xi8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
225 %tmp1 = load <8 x i16>* %A
226 %tmp2 = load <16 x i8>* %B
227 ; CHECK: vpadal.s8 q9, q8 @ encoding: [0x60,0x26,0xf0,0xf3]
228 %tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
232 ; CHECK: vpadals_8xi16
233 define <4 x i32> @vpadals_8xi16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
234 %tmp1 = load <4 x i32>* %A
235 %tmp2 = load <8 x i16>* %B
236 ; CHECK: vpadal.s16 q9, q8 @ encoding: [0x60,0x26,0xf4,0xf3]
237 %tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
241 ; CHECK: vpadals_4xi32
242 define <2 x i64> @vpadals_4xi32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
243 %tmp1 = load <2 x i64>* %A
244 %tmp2 = load <4 x i32>* %B
245 ; CHECK: vpadal.s32 q9, q8 @ encoding: [0x60,0x26,0xf8,0xf3]
246 %tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
250 declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) nounwind readnone
251 declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) nounwind readnone
252 declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) nounwind readnone
254 ; CHECK: vpadalu_16xi8
255 define <8 x i16> @vpadalu_16xi8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
256 %tmp1 = load <8 x i16>* %A
257 %tmp2 = load <16 x i8>* %B
258 ; CHECK: vpadal.u8 q9, q8 @ encoding: [0xe0,0x26,0xf0,0xf3]
259 %tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
263 ; CHECK: vpadalu_8xi16
264 define <4 x i32> @vpadalu_8xi16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
265 %tmp1 = load <4 x i32>* %A
266 %tmp2 = load <8 x i16>* %B
267 ; CHECK: vpadal.u16 q9, q8 @ encoding: [0xe0,0x26,0xf4,0xf3]
268 %tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
272 ; CHECK: vpadalu_4xi32
273 define <2 x i64> @vpadalu_4xi32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
274 %tmp1 = load <2 x i64>* %A
275 %tmp2 = load <4 x i32>* %B
276 ; CHECK: vpadal.u32 q9, q8 @ encoding: [0xe0,0x26,0xf8,0xf3]
277 %tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
281 declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
282 declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
283 declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
286 define <8 x i8> @vpmins_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
287 %tmp1 = load <8 x i8>* %A
288 %tmp2 = load <8 x i8>* %B
289 ; CHECK: vpmin.s8 d16, d16, d17 @ encoding: [0xb1,0x0a,0x40,0xf2]
290 %tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
294 ; CHECK: vpmins_4xi16
295 define <4 x i16> @vpmins_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
296 %tmp1 = load <4 x i16>* %A
297 %tmp2 = load <4 x i16>* %B
298 ; CHECK: vpmin.s16 d16, d16, d17 @ encoding: [0xb1,0x0a,0x50,0xf2]
299 %tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
303 ; CHECK: vpmins_2xi32
304 define <2 x i32> @vpmins_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
305 %tmp1 = load <2 x i32>* %A
306 %tmp2 = load <2 x i32>* %B
307 ; CHECK: vpmin.s32 d16, d16, d17 @ encoding: [0xb1,0x0a,0x60,0xf2]
308 %tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
312 declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
313 declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
314 declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
317 define <8 x i8> @vpminu_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
318 %tmp1 = load <8 x i8>* %A
319 %tmp2 = load <8 x i8>* %B
320 ; CHECK: vpmin.u8 d16, d16, d17 @ encoding: [0xb1,0x0a,0x40,0xf3]
321 %tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
325 ; CHECK: vpminu_4xi16
326 define <4 x i16> @vpminu_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
327 %tmp1 = load <4 x i16>* %A
328 %tmp2 = load <4 x i16>* %B
329 ; CHECK: vpmin.u16 d16, d16, d17 @ encoding: [0xb1,0x0a,0x50,0xf3]
330 %tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
334 ; CHECK: vpminu_2xi32
335 define <2 x i32> @vpminu_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
336 %tmp1 = load <2 x i32>* %A
337 %tmp2 = load <2 x i32>* %B
338 ; CHECK: vpmin.u32 d16, d16, d17 @ encoding: [0xb1,0x0a,0x60,0xf3]
339 %tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
343 declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
345 ; CHECK: vpmin_2xfloat
346 define <2 x float> @vpmin_2xfloat(<2 x float>* %A, <2 x float>* %B) nounwind {
347 %tmp1 = load <2 x float>* %A
348 %tmp2 = load <2 x float>* %B
349 ; CHECK: vpmin.f32 d16, d16, d17 @ encoding: [0xa1,0x0f,0x60,0xf3]
350 %tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
351 ret <2 x float> %tmp3
354 declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
355 declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
356 declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
359 define <8 x i8> @vpmaxs_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
360 %tmp1 = load <8 x i8>* %A
361 %tmp2 = load <8 x i8>* %B
362 ; CHECK: vpmax.s8 d16, d16, d17 @ encoding: [0xa1,0x0a,0x40,0xf2]
363 %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
367 ; CHECK: vpmaxs_4xi16
368 define <4 x i16> @vpmaxs_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
369 %tmp1 = load <4 x i16>* %A
370 %tmp2 = load <4 x i16>* %B
371 ; CHECK: vpmax.s16 d16, d16, d17 @ encoding: [0xa1,0x0a,0x50,0xf2]
372 %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
376 ; CHECK: vpmaxs_2xi32
377 define <2 x i32> @vpmaxs_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
378 %tmp1 = load <2 x i32>* %A
379 %tmp2 = load <2 x i32>* %B
380 ; CHECK: vpmax.s32 d16, d16, d17 @ encoding: [0xa1,0x0a,0x60,0xf2]
381 %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
385 declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
386 declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
387 declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
390 define <8 x i8> @vpmaxu_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
391 %tmp1 = load <8 x i8>* %A
392 %tmp2 = load <8 x i8>* %B
393 ; CHECK: vpmax.u8 d16, d16, d17 @ encoding: [0xa1,0x0a,0x40,0xf3]
394 %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
398 ; CHECK: vpmaxu_4xi16
399 define <4 x i16> @vpmaxu_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
400 %tmp1 = load <4 x i16>* %A
401 %tmp2 = load <4 x i16>* %B
402 ; CHECK: vpmax.u16 d16, d16, d17 @ encoding: [0xa1,0x0a,0x50,0xf3]
403 %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
407 ; CHECK: vpmaxu_2xi32
408 define <2 x i32> @vpmaxu_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
409 %tmp1 = load <2 x i32>* %A
410 %tmp2 = load <2 x i32>* %B
411 ; CHECK: vpmax.u32 d16, d16, d17 @ encoding: [0xa1,0x0a,0x60,0xf3]
412 %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
416 declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
418 ; CHECK: vpmax_2xfloat
419 define <2 x float> @vpmax_2xfloat(<2 x float>* %A, <2 x float>* %B) nounwind {
420 %tmp1 = load <2 x float>* %A
421 %tmp2 = load <2 x float>* %B
422 ; CHECK: vpmax.f32 d16, d16, d17 @ encoding: [0xa1,0x0f,0x40,0xf3]
423 %tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
424 ret <2 x float> %tmp3