1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
3 declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>)
5 declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
7 declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
9 declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>)
11 declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
13 declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
15 declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
17 declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
19 declare <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double>, <1 x double>, <1 x double>)
21 declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>)
23 declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>)
25 declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
27 define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
28 ; CHECK-LABEL: test_vbsl_s8:
29 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
31 %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
35 define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
36 ; CHECK-LABEL: test_vbsl_s16:
37 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
39 %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
40 %0 = bitcast <4 x i16> %vbsl3.i to <8 x i8>
44 define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
45 ; CHECK-LABEL: test_vbsl_s32:
46 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
48 %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
49 ret <2 x i32> %vbsl3.i
52 define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
53 ; CHECK-LABEL: test_vbsl_s64:
54 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
56 %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
57 ret <1 x i64> %vbsl3.i
60 define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
61 ; CHECK-LABEL: test_vbsl_u8:
62 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
64 %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
68 define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
69 ; CHECK-LABEL: test_vbsl_u16:
70 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
72 %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
73 ret <4 x i16> %vbsl3.i
76 define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
77 ; CHECK-LABEL: test_vbsl_u32:
78 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
80 %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
81 ret <2 x i32> %vbsl3.i
84 define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
85 ; CHECK-LABEL: test_vbsl_u64:
86 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
88 %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
89 ret <1 x i64> %vbsl3.i
92 define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) {
93 ; CHECK-LABEL: test_vbsl_f32:
94 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
96 %vbsl3.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3)
97 ret <2 x float> %vbsl3.i
100 define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) {
101 ; CHECK-LABEL: test_vbsl_f64:
102 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
104 %vbsl.i = bitcast <1 x i64> %v1 to <1 x double>
105 %vbsl3.i = tail call <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double> %vbsl.i, <1 x double> %v2, <1 x double> %v3)
106 ret <1 x double> %vbsl3.i
109 define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
110 ; CHECK-LABEL: test_vbsl_p8:
111 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
113 %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
117 define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
118 ; CHECK-LABEL: test_vbsl_p16:
119 ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
121 %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
122 ret <4 x i16> %vbsl3.i
125 define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
126 ; CHECK-LABEL: test_vbslq_s8:
127 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
129 %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
130 ret <16 x i8> %vbsl.i
133 define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
134 ; CHECK-LABEL: test_vbslq_s16:
135 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
137 %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
138 ret <8 x i16> %vbsl3.i
141 define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
142 ; CHECK-LABEL: test_vbslq_s32:
143 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
145 %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
146 ret <4 x i32> %vbsl3.i
149 define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
150 ; CHECK-LABEL: test_vbslq_s64:
151 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
153 %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
154 ret <2 x i64> %vbsl3.i
157 define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
158 ; CHECK-LABEL: test_vbslq_u8:
159 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
161 %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
162 ret <16 x i8> %vbsl.i
165 define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
166 ; CHECK-LABEL: test_vbslq_u16:
167 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
169 %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
170 ret <8 x i16> %vbsl3.i
173 define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
174 ; CHECK-LABEL: test_vbslq_u32:
175 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
177 %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
178 ret <4 x i32> %vbsl3.i
181 define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
182 ; CHECK-LABEL: test_vbslq_u64:
183 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
185 %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
186 ret <2 x i64> %vbsl3.i
189 define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) {
190 ; CHECK-LABEL: test_vbslq_f32:
191 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
193 %vbsl.i = bitcast <4 x i32> %v1 to <4 x float>
194 %vbsl3.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %vbsl.i, <4 x float> %v2, <4 x float> %v3)
195 ret <4 x float> %vbsl3.i
198 define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
199 ; CHECK-LABEL: test_vbslq_p8:
200 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
202 %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
203 ret <16 x i8> %vbsl.i
206 define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
207 ; CHECK-LABEL: test_vbslq_p16:
208 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
210 %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
211 ret <8 x i16> %vbsl3.i
214 define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) {
215 ; CHECK-LABEL: test_vbslq_f64:
216 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
218 %vbsl.i = bitcast <2 x i64> %v1 to <2 x double>
219 %vbsl3.i = tail call <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double> %vbsl.i, <2 x double> %v2, <2 x double> %v3)
220 ret <2 x double> %vbsl3.i
223 define <2 x double> @test_bsl_v2f64(<2 x i1> %v1, <2 x double> %v2, <2 x double> %v3) {
224 ; CHECK-LABEL: test_bsl_v2f64:
225 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
226 %1 = select <2 x i1> %v1, <2 x double> %v2, <2 x double> %v3
230 define <4 x float> @test_bsl_v4f32(<4 x i1> %v1, <4 x float> %v2, <4 x float> %v3) {
231 ; CHECK-LABEL: test_bsl_v4f32:
232 ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
233 %1 = select <4 x i1> %v1, <4 x float> %v2, <4 x float> %v3