1 ; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
3 define signext i8 @test_vaddv_s8(<8 x i8> %a1) {
4 ; CHECK-LABEL: test_vaddv_s8:
5 ; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
6 ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
9 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a1)
10 %0 = trunc i32 %vaddv.i to i8
14 define signext i16 @test_vaddv_s16(<4 x i16> %a1) {
15 ; CHECK-LABEL: test_vaddv_s16:
16 ; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
17 ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
20 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a1)
21 %0 = trunc i32 %vaddv.i to i16
25 define i32 @test_vaddv_s32(<2 x i32> %a1) {
26 ; CHECK-LABEL: test_vaddv_s32:
27 ; 2 x i32 is not supported by the ISA, thus, this is a special case
28 ; CHECK: addp.2s v[[REGNUM:[0-9]+]], v0, v0
29 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
32 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a1)
36 define i64 @test_vaddv_s64(<2 x i64> %a1) {
37 ; CHECK-LABEL: test_vaddv_s64:
38 ; CHECK: addp.2d [[REGNUM:d[0-9]+]], v0
39 ; CHECK-NEXT: fmov x0, [[REGNUM]]
42 %vaddv.i = tail call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a1)
46 define zeroext i8 @test_vaddv_u8(<8 x i8> %a1) {
47 ; CHECK-LABEL: test_vaddv_u8:
48 ; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
49 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
52 %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
53 %0 = trunc i32 %vaddv.i to i8
57 define i32 @test_vaddv_u8_masked(<8 x i8> %a1) {
58 ; CHECK-LABEL: test_vaddv_u8_masked:
59 ; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
60 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
63 %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
64 %0 = and i32 %vaddv.i, 511 ; 0x1ff
68 define zeroext i16 @test_vaddv_u16(<4 x i16> %a1) {
69 ; CHECK-LABEL: test_vaddv_u16:
70 ; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
71 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
74 %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
75 %0 = trunc i32 %vaddv.i to i16
79 define i32 @test_vaddv_u16_masked(<4 x i16> %a1) {
80 ; CHECK-LABEL: test_vaddv_u16_masked:
81 ; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
82 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
85 %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
86 %0 = and i32 %vaddv.i, 3276799 ; 0x31ffff
90 define i32 @test_vaddv_u32(<2 x i32> %a1) {
91 ; CHECK-LABEL: test_vaddv_u32:
92 ; 2 x i32 is not supported by the ISA, thus, this is a special case
93 ; CHECK: addp.2s v[[REGNUM:[0-9]+]], v0, v0
94 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
97 %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a1)
101 define float @test_vaddv_f32(<2 x float> %a1) {
102 ; CHECK-LABEL: test_vaddv_f32:
103 ; CHECK: faddp.2s s0, v0
106 %vaddv.i = tail call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a1)
110 define float @test_vaddv_v4f32(<4 x float> %a1) {
111 ; CHECK-LABEL: test_vaddv_v4f32:
112 ; CHECK: faddp.4s [[REGNUM:v[0-9]+]], v0, v0
113 ; CHECK: faddp.2s s0, [[REGNUM]]
116 %vaddv.i = tail call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a1)
120 define double @test_vaddv_f64(<2 x double> %a1) {
121 ; CHECK-LABEL: test_vaddv_f64:
122 ; CHECK: faddp.2d d0, v0
125 %vaddv.i = tail call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a1)
129 define i64 @test_vaddv_u64(<2 x i64> %a1) {
130 ; CHECK-LABEL: test_vaddv_u64:
131 ; CHECK: addp.2d [[REGNUM:d[0-9]+]], v0
132 ; CHECK-NEXT: fmov x0, [[REGNUM]]
135 %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
139 define <1 x i64> @test_vaddv_u64_to_vec(<2 x i64> %a1) {
140 ; CHECK-LABEL: test_vaddv_u64_to_vec:
141 ; CHECK: addp.2d d0, v0
146 %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
147 %vec = insertelement <1 x i64> undef, i64 %vaddv.i, i32 0
151 define signext i8 @test_vaddvq_s8(<16 x i8> %a1) {
152 ; CHECK-LABEL: test_vaddvq_s8:
153 ; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0
154 ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
157 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a1)
158 %0 = trunc i32 %vaddv.i to i8
162 define signext i16 @test_vaddvq_s16(<8 x i16> %a1) {
163 ; CHECK-LABEL: test_vaddvq_s16:
164 ; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
165 ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
168 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a1)
169 %0 = trunc i32 %vaddv.i to i16
173 define i32 @test_vaddvq_s32(<4 x i32> %a1) {
174 ; CHECK-LABEL: test_vaddvq_s32:
175 ; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
176 ; CHECK-NEXT: fmov w0, [[REGNUM]]
179 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a1)
183 define zeroext i8 @test_vaddvq_u8(<16 x i8> %a1) {
184 ; CHECK-LABEL: test_vaddvq_u8:
185 ; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0
186 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
189 %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a1)
190 %0 = trunc i32 %vaddv.i to i8
194 define zeroext i16 @test_vaddvq_u16(<8 x i16> %a1) {
195 ; CHECK-LABEL: test_vaddvq_u16:
196 ; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
197 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
200 %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a1)
201 %0 = trunc i32 %vaddv.i to i16
205 define i32 @test_vaddvq_u32(<4 x i32> %a1) {
206 ; CHECK-LABEL: test_vaddvq_u32:
207 ; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
208 ; CHECK-NEXT: fmov [[FMOVRES:w[0-9]+]], [[REGNUM]]
211 %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a1)
215 declare i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32>)
217 declare i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16>)
219 declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
221 declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
223 declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
225 declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
227 declare i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64>)
229 declare i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32>)
231 declare i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16>)
233 declare i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8>)
235 declare i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32>)
237 declare i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64>)
239 declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
241 declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
243 declare float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a1)
244 declare float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a1)
245 declare double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a1)