1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
3 declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
5 declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
7 declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
9 declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
11 declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
13 declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
15 declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
17 declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
19 declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
21 declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
23 declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
25 declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
27 declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
29 declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
31 declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
33 declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
35 declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
37 declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
39 declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
41 declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
43 declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
45 define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
46 ; CHECK: test_vmla_lane_s16:
47 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
49 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
50 %mul = mul <4 x i16> %shuffle, %b
51 %add = add <4 x i16> %mul, %a
55 define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
56 ; CHECK: test_vmlaq_lane_s16:
57 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
59 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
60 %mul = mul <8 x i16> %shuffle, %b
61 %add = add <8 x i16> %mul, %a
65 define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
66 ; CHECK: test_vmla_lane_s32:
67 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
69 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
70 %mul = mul <2 x i32> %shuffle, %b
71 %add = add <2 x i32> %mul, %a
75 define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
76 ; CHECK: test_vmlaq_lane_s32:
77 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
79 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
80 %mul = mul <4 x i32> %shuffle, %b
81 %add = add <4 x i32> %mul, %a
85 define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
86 ; CHECK: test_vmla_laneq_s16:
87 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
89 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
90 %mul = mul <4 x i16> %shuffle, %b
91 %add = add <4 x i16> %mul, %a
95 define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
96 ; CHECK: test_vmlaq_laneq_s16:
97 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
99 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
100 %mul = mul <8 x i16> %shuffle, %b
101 %add = add <8 x i16> %mul, %a
105 define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
106 ; CHECK: test_vmla_laneq_s32:
107 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
109 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
110 %mul = mul <2 x i32> %shuffle, %b
111 %add = add <2 x i32> %mul, %a
115 define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
116 ; CHECK: test_vmlaq_laneq_s32:
117 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
119 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
120 %mul = mul <4 x i32> %shuffle, %b
121 %add = add <4 x i32> %mul, %a
125 define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
126 ; CHECK: test_vmls_lane_s16:
127 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
129 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
130 %mul = mul <4 x i16> %shuffle, %b
131 %sub = sub <4 x i16> %a, %mul
135 define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
136 ; CHECK: test_vmlsq_lane_s16:
137 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
139 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
140 %mul = mul <8 x i16> %shuffle, %b
141 %sub = sub <8 x i16> %a, %mul
145 define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
146 ; CHECK: test_vmls_lane_s32:
147 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
149 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
150 %mul = mul <2 x i32> %shuffle, %b
151 %sub = sub <2 x i32> %a, %mul
155 define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
156 ; CHECK: test_vmlsq_lane_s32:
157 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
159 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
160 %mul = mul <4 x i32> %shuffle, %b
161 %sub = sub <4 x i32> %a, %mul
165 define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
166 ; CHECK: test_vmls_laneq_s16:
167 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
169 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
170 %mul = mul <4 x i16> %shuffle, %b
171 %sub = sub <4 x i16> %a, %mul
175 define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
176 ; CHECK: test_vmlsq_laneq_s16:
177 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
179 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
180 %mul = mul <8 x i16> %shuffle, %b
181 %sub = sub <8 x i16> %a, %mul
185 define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
186 ; CHECK: test_vmls_laneq_s32:
187 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
189 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
190 %mul = mul <2 x i32> %shuffle, %b
191 %sub = sub <2 x i32> %a, %mul
195 define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
196 ; CHECK: test_vmlsq_laneq_s32:
197 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
199 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
200 %mul = mul <4 x i32> %shuffle, %b
201 %sub = sub <4 x i32> %a, %mul
205 define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
206 ; CHECK: test_vmul_lane_s16:
207 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
209 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
210 %mul = mul <4 x i16> %shuffle, %a
214 define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
215 ; CHECK: test_vmulq_lane_s16:
216 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
218 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
219 %mul = mul <8 x i16> %shuffle, %a
223 define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
224 ; CHECK: test_vmul_lane_s32:
225 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
227 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
228 %mul = mul <2 x i32> %shuffle, %a
232 define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
233 ; CHECK: test_vmulq_lane_s32:
234 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
236 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
237 %mul = mul <4 x i32> %shuffle, %a
241 define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
242 ; CHECK: test_vmul_lane_u16:
243 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
245 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
246 %mul = mul <4 x i16> %shuffle, %a
250 define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
251 ; CHECK: test_vmulq_lane_u16:
252 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
254 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
255 %mul = mul <8 x i16> %shuffle, %a
259 define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
260 ; CHECK: test_vmul_lane_u32:
261 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
263 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
264 %mul = mul <2 x i32> %shuffle, %a
268 define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
269 ; CHECK: test_vmulq_lane_u32:
270 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
272 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
273 %mul = mul <4 x i32> %shuffle, %a
277 define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
278 ; CHECK: test_vmul_laneq_s16:
279 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
281 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
282 %mul = mul <4 x i16> %shuffle, %a
286 define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
287 ; CHECK: test_vmulq_laneq_s16:
288 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
290 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
291 %mul = mul <8 x i16> %shuffle, %a
295 define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
296 ; CHECK: test_vmul_laneq_s32:
297 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
299 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
300 %mul = mul <2 x i32> %shuffle, %a
304 define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
305 ; CHECK: test_vmulq_laneq_s32:
306 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
308 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
309 %mul = mul <4 x i32> %shuffle, %a
313 define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
314 ; CHECK: test_vmul_laneq_u16:
315 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
317 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
318 %mul = mul <4 x i16> %shuffle, %a
322 define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
323 ; CHECK: test_vmulq_laneq_u16:
324 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
326 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
327 %mul = mul <8 x i16> %shuffle, %a
331 define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
332 ; CHECK: test_vmul_laneq_u32:
333 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
335 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
336 %mul = mul <2 x i32> %shuffle, %a
340 define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
341 ; CHECK: test_vmulq_laneq_u32:
342 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
344 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
345 %mul = mul <4 x i32> %shuffle, %a
349 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
350 ; CHECK: test_vfma_lane_f32:
351 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
353 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
354 %mul = fmul <2 x float> %shuffle, %b
355 %add = fadd <2 x float> %mul, %a
359 define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
360 ; CHECK: test_vfmaq_lane_f32:
361 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
363 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
364 %mul = fmul <4 x float> %shuffle, %b
365 %add = fadd <4 x float> %mul, %a
369 define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
370 ; CHECK: test_vfma_laneq_f32:
371 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
373 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 1, i32 1>
374 %mul = fmul <2 x float> %shuffle, %b
375 %add = fadd <2 x float> %mul, %a
379 define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
380 ; CHECK: test_vfmaq_laneq_f32:
381 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
383 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
384 %mul = fmul <4 x float> %shuffle, %b
385 %add = fadd <4 x float> %mul, %a
389 define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
390 ; CHECK: test_vfms_lane_f32:
391 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
393 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
394 %mul = fmul <2 x float> %shuffle, %b
395 %sub = fsub <2 x float> %a, %mul
399 define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
400 ; CHECK: test_vfmsq_lane_f32:
401 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
403 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
404 %mul = fmul <4 x float> %shuffle, %b
405 %sub = fsub <4 x float> %a, %mul
409 define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
410 ; CHECK: test_vfms_laneq_f32:
411 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
413 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 1, i32 1>
414 %mul = fmul <2 x float> %shuffle, %b
415 %sub = fsub <2 x float> %a, %mul
419 define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
420 ; CHECK: test_vfmsq_laneq_f32:
421 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
423 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
424 %mul = fmul <4 x float> %shuffle, %b
425 %sub = fsub <4 x float> %a, %mul
429 define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
430 ; CHECK: test_vfmaq_lane_f64:
431 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
433 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
434 %mul = fmul <2 x double> %shuffle, %b
435 %add = fadd <2 x double> %mul, %a
436 ret <2 x double> %add
439 define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
440 ; CHECK: test_vfmaq_laneq_f64_0:
441 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
443 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
444 %mul = fmul <2 x double> %shuffle, %b
445 %add = fadd <2 x double> %mul, %a
446 ret <2 x double> %add
449 define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
450 ; CHECK: test_vfmaq_laneq_f64:
451 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
453 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
454 %mul = fmul <2 x double> %shuffle, %b
455 %add = fadd <2 x double> %mul, %a
456 ret <2 x double> %add
459 define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
460 ; CHECK: test_vfmsq_lane_f64:
461 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
463 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
464 %mul = fmul <2 x double> %shuffle, %b
465 %sub = fsub <2 x double> %a, %mul
466 ret <2 x double> %sub
469 define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
470 ; CHECK: test_vfmsq_laneq_f64_0:
471 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
473 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
474 %mul = fmul <2 x double> %shuffle, %b
475 %sub = fsub <2 x double> %a, %mul
476 ret <2 x double> %sub
479 define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
480 ; CHECK: test_vfmsq_laneq_f64:
481 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
483 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
484 %mul = fmul <2 x double> %shuffle, %b
485 %sub = fsub <2 x double> %a, %mul
486 ret <2 x double> %sub
489 define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
490 ; CHECK: test_vmlal_lane_s16:
491 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
493 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
494 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
495 %add = add <4 x i32> %vmull2.i, %a
499 define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
500 ; CHECK: test_vmlal_lane_s32:
501 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
503 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
504 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
505 %add = add <2 x i64> %vmull2.i, %a
509 define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
510 ; CHECK: test_vmlal_laneq_s16:
511 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
513 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
514 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
515 %add = add <4 x i32> %vmull2.i, %a
519 define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
520 ; CHECK: test_vmlal_laneq_s32:
521 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
523 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
524 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
525 %add = add <2 x i64> %vmull2.i, %a
529 define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
530 ; CHECK: test_vmlal_high_lane_s16:
531 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
533 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
534 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
535 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
536 %add = add <4 x i32> %vmull2.i, %a
540 define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
541 ; CHECK: test_vmlal_high_lane_s32:
542 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
544 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
545 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
546 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
547 %add = add <2 x i64> %vmull2.i, %a
551 define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
552 ; CHECK: test_vmlal_high_laneq_s16:
553 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
555 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
556 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
557 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
558 %add = add <4 x i32> %vmull2.i, %a
562 define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
563 ; CHECK: test_vmlal_high_laneq_s32:
564 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
566 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
567 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
568 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
569 %add = add <2 x i64> %vmull2.i, %a
573 define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
574 ; CHECK: test_vmlsl_lane_s16:
575 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
577 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
578 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
579 %sub = sub <4 x i32> %a, %vmull2.i
583 define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
584 ; CHECK: test_vmlsl_lane_s32:
585 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
587 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
588 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
589 %sub = sub <2 x i64> %a, %vmull2.i
593 define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
594 ; CHECK: test_vmlsl_laneq_s16:
595 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
597 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
598 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
599 %sub = sub <4 x i32> %a, %vmull2.i
603 define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
604 ; CHECK: test_vmlsl_laneq_s32:
605 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
607 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
608 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
609 %sub = sub <2 x i64> %a, %vmull2.i
613 define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
614 ; CHECK: test_vmlsl_high_lane_s16:
615 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
617 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
618 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
619 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
620 %sub = sub <4 x i32> %a, %vmull2.i
624 define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
625 ; CHECK: test_vmlsl_high_lane_s32:
626 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
628 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
629 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
630 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
631 %sub = sub <2 x i64> %a, %vmull2.i
635 define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
636 ; CHECK: test_vmlsl_high_laneq_s16:
637 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
639 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
640 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
641 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
642 %sub = sub <4 x i32> %a, %vmull2.i
646 define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
647 ; CHECK: test_vmlsl_high_laneq_s32:
648 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
650 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
651 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
652 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
653 %sub = sub <2 x i64> %a, %vmull2.i
657 define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
658 ; CHECK: test_vmlal_lane_u16:
659 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
661 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
662 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
663 %add = add <4 x i32> %vmull2.i, %a
667 define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
668 ; CHECK: test_vmlal_lane_u32:
669 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
671 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
672 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
673 %add = add <2 x i64> %vmull2.i, %a
677 define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
678 ; CHECK: test_vmlal_laneq_u16:
679 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
681 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
682 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
683 %add = add <4 x i32> %vmull2.i, %a
687 define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
688 ; CHECK: test_vmlal_laneq_u32:
689 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
691 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
692 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
693 %add = add <2 x i64> %vmull2.i, %a
697 define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
698 ; CHECK: test_vmlal_high_lane_u16:
699 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
701 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
702 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
703 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
704 %add = add <4 x i32> %vmull2.i, %a
708 define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
709 ; CHECK: test_vmlal_high_lane_u32:
710 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
712 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
713 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
714 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
715 %add = add <2 x i64> %vmull2.i, %a
719 define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
720 ; CHECK: test_vmlal_high_laneq_u16:
721 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
723 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
724 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
725 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
726 %add = add <4 x i32> %vmull2.i, %a
730 define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
731 ; CHECK: test_vmlal_high_laneq_u32:
732 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
734 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
735 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
736 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
737 %add = add <2 x i64> %vmull2.i, %a
741 define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
742 ; CHECK: test_vmlsl_lane_u16:
743 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
745 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
746 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
747 %sub = sub <4 x i32> %a, %vmull2.i
751 define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
752 ; CHECK: test_vmlsl_lane_u32:
753 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
755 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
756 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
757 %sub = sub <2 x i64> %a, %vmull2.i
761 define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
762 ; CHECK: test_vmlsl_laneq_u16:
763 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
765 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
766 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
767 %sub = sub <4 x i32> %a, %vmull2.i
771 define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
772 ; CHECK: test_vmlsl_laneq_u32:
773 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
775 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
776 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
777 %sub = sub <2 x i64> %a, %vmull2.i
781 define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
782 ; CHECK: test_vmlsl_high_lane_u16:
783 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
785 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
786 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
787 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
788 %sub = sub <4 x i32> %a, %vmull2.i
792 define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
793 ; CHECK: test_vmlsl_high_lane_u32:
794 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
796 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
797 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
798 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
799 %sub = sub <2 x i64> %a, %vmull2.i
803 define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
804 ; CHECK: test_vmlsl_high_laneq_u16:
805 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
807 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
808 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
809 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
810 %sub = sub <4 x i32> %a, %vmull2.i
814 define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
815 ; CHECK: test_vmlsl_high_laneq_u32:
816 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
818 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
819 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
820 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
821 %sub = sub <2 x i64> %a, %vmull2.i
825 define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
826 ; CHECK: test_vmull_lane_s16:
827 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
829 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
830 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
831 ret <4 x i32> %vmull2.i
834 define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
835 ; CHECK: test_vmull_lane_s32:
836 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
838 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
839 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
840 ret <2 x i64> %vmull2.i
843 define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
844 ; CHECK: test_vmull_lane_u16:
845 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
847 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
848 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
849 ret <4 x i32> %vmull2.i
852 define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
853 ; CHECK: test_vmull_lane_u32:
854 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
856 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
857 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
858 ret <2 x i64> %vmull2.i
861 define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
862 ; CHECK: test_vmull_high_lane_s16:
863 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
865 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
866 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
867 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
868 ret <4 x i32> %vmull2.i
871 define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
872 ; CHECK: test_vmull_high_lane_s32:
873 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
875 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
876 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
877 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
878 ret <2 x i64> %vmull2.i
881 define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
882 ; CHECK: test_vmull_high_lane_u16:
883 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
885 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
886 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
887 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
888 ret <4 x i32> %vmull2.i
891 define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
892 ; CHECK: test_vmull_high_lane_u32:
893 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
895 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
896 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
897 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
898 ret <2 x i64> %vmull2.i
901 define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
902 ; CHECK: test_vmull_laneq_s16:
903 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
905 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
906 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
907 ret <4 x i32> %vmull2.i
910 define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
911 ; CHECK: test_vmull_laneq_s32:
912 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
914 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
915 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
916 ret <2 x i64> %vmull2.i
919 define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
920 ; CHECK: test_vmull_laneq_u16:
921 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
923 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
924 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
925 ret <4 x i32> %vmull2.i
928 define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
929 ; CHECK: test_vmull_laneq_u32:
930 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
932 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
933 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
934 ret <2 x i64> %vmull2.i
937 define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
938 ; CHECK: test_vmull_high_laneq_s16:
939 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
941 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
942 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
943 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
944 ret <4 x i32> %vmull2.i
947 define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
948 ; CHECK: test_vmull_high_laneq_s32:
949 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
951 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
952 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
953 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
954 ret <2 x i64> %vmull2.i
957 define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
958 ; CHECK: test_vmull_high_laneq_u16:
959 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
961 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
962 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
963 %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
964 ret <4 x i32> %vmull2.i
967 define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
968 ; CHECK: test_vmull_high_laneq_u32:
969 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
971 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
972 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
973 %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
974 ret <2 x i64> %vmull2.i
977 define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
978 ; CHECK: test_vqdmlal_lane_s16:
979 ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
981 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
982 %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
983 %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) #2
984 ret <4 x i32> %vqdmlal4.i
987 define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
988 ; CHECK: test_vqdmlal_lane_s32:
989 ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
991 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
992 %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
993 %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) #2
994 ret <2 x i64> %vqdmlal4.i
997 define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
998 ; CHECK: test_vqdmlal_high_lane_s16:
999 ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
1001 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1002 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1003 %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
1004 %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) #2
1005 ret <4 x i32> %vqdmlal4.i
1008 define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1009 ; CHECK: test_vqdmlal_high_lane_s32:
1010 ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1012 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1013 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1014 %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
1015 %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) #2
1016 ret <2 x i64> %vqdmlal4.i
1019 define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1020 ; CHECK: test_vqdmlsl_lane_s16:
1021 ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
1023 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1024 %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) #2
1025 %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) #2
1026 ret <4 x i32> %vqdmlsl4.i
1029 define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1030 ; CHECK: test_vqdmlsl_lane_s32:
1031 ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
1033 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1034 %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) #2
1035 %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) #2
1036 ret <2 x i64> %vqdmlsl4.i
1039 define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1040 ; CHECK: test_vqdmlsl_high_lane_s16:
1041 ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
1043 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1044 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1045 %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
1046 %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) #2
1047 ret <4 x i32> %vqdmlsl4.i
1050 define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1051 ; CHECK: test_vqdmlsl_high_lane_s32:
1052 ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1054 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1055 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1056 %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
1057 %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) #2
1058 ret <2 x i64> %vqdmlsl4.i
1061 define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1062 ; CHECK: test_vqdmull_lane_s16:
1063 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
1065 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1066 %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
1067 ret <4 x i32> %vqdmull2.i
1070 define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1071 ; CHECK: test_vqdmull_lane_s32:
1072 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
1074 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1075 %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
1076 ret <2 x i64> %vqdmull2.i
1079 define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
1080 ; CHECK: test_vqdmull_laneq_s16:
1081 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
1083 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1084 %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
1085 ret <4 x i32> %vqdmull2.i
1088 define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
1089 ; CHECK: test_vqdmull_laneq_s32:
1090 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
1092 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
1093 %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
1094 ret <2 x i64> %vqdmull2.i
1097 define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1098 ; CHECK: test_vqdmull_high_lane_s16:
1099 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
1101 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1102 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1103 %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
1104 ret <4 x i32> %vqdmull2.i
1107 define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1108 ; CHECK: test_vqdmull_high_lane_s32:
1109 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1111 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1112 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1113 %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
1114 ret <2 x i64> %vqdmull2.i
1117 define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
1118 ; CHECK: test_vqdmull_high_laneq_s16:
1119 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
1121 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1122 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1123 %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) #2
1124 ret <4 x i32> %vqdmull2.i
1127 define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
1128 ; CHECK: test_vqdmull_high_laneq_s32:
1129 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1131 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1132 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
1133 %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) #2
1134 ret <2 x i64> %vqdmull2.i
1137 define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1138 ; CHECK: test_vqdmulh_lane_s16:
1139 ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
1141 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1142 %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) #2
1143 ret <4 x i16> %vqdmulh2.i
1146 define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1147 ; CHECK: test_vqdmulhq_lane_s16:
1148 ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
1150 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1151 %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) #2
1152 ret <8 x i16> %vqdmulh2.i
1155 define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1156 ; CHECK: test_vqdmulh_lane_s32:
1157 ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
1159 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1160 %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) #2
1161 ret <2 x i32> %vqdmulh2.i
1164 define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1165 ; CHECK: test_vqdmulhq_lane_s32:
1166 ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1168 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1169 %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) #2
1170 ret <4 x i32> %vqdmulh2.i
1173 define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1174 ; CHECK: test_vqrdmulh_lane_s16:
1175 ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[{{[0-9]+}}]
1177 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1178 %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) #2
1179 ret <4 x i16> %vqrdmulh2.i
1182 define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1183 ; CHECK: test_vqrdmulhq_lane_s16:
1184 ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
1186 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1187 %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) #2
1188 ret <8 x i16> %vqrdmulh2.i
1191 define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1192 ; CHECK: test_vqrdmulh_lane_s32:
1193 ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
1195 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1196 %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) #2
1197 ret <2 x i32> %vqrdmulh2.i
1200 define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1201 ; CHECK: test_vqrdmulhq_lane_s32:
1202 ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1204 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1205 %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) #2
1206 ret <4 x i32> %vqrdmulh2.i
1209 define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
1210 ; CHECK: test_vmul_lane_f32:
1211 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
1213 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1214 %mul = fmul <2 x float> %shuffle, %a
1215 ret <2 x float> %mul
1218 define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
1219 ; CHECK: test_vmulq_lane_f32:
1220 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1222 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1223 %mul = fmul <4 x float> %shuffle, %a
1224 ret <4 x float> %mul
1227 define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
1228 ; CHECK: test_vmulq_lane_f64:
1229 ; CHECK: mul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
1231 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
1232 %mul = fmul <2 x double> %shuffle, %a
1233 ret <2 x double> %mul
1236 define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
1237 ; CHECK: test_vmul_laneq_f32:
1238 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
1240 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 1, i32 1>
1241 %mul = fmul <2 x float> %shuffle, %a
1242 ret <2 x float> %mul
1245 define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
1246 ; CHECK: test_vmulq_laneq_f32:
1247 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1249 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1250 %mul = fmul <4 x float> %shuffle, %a
1251 ret <4 x float> %mul
1254 define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
1255 ; CHECK: test_vmulq_laneq_f64_0:
1256 ; CHECK: mul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
1258 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
1259 %mul = fmul <2 x double> %shuffle, %a
1260 ret <2 x double> %mul
1263 define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
1264 ; CHECK: test_vmulq_laneq_f64:
1265 ; CHECK: mul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
1267 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
1268 %mul = fmul <2 x double> %shuffle, %a
1269 ret <2 x double> %mul
1272 define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
1273 ; CHECK: test_vmulx_lane_f32:
1274 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
1276 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1277 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) #2
1278 ret <2 x float> %vmulx2.i
1281 define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
1282 ; CHECK: test_vmulxq_lane_f32:
1283 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1285 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1286 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) #2
1287 ret <4 x float> %vmulx2.i
1290 define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
1291 ; CHECK: test_vmulxq_lane_f64:
1292 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
1294 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
1295 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) #2
1296 ret <2 x double> %vmulx2.i
1299 define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
1300 ; CHECK: test_vmulx_laneq_f32:
1301 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
1303 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 1, i32 1>
1304 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) #2
1305 ret <2 x float> %vmulx2.i
1308 define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
1309 ; CHECK: test_vmulxq_laneq_f32:
1310 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
1312 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1313 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) #2
1314 ret <4 x float> %vmulx2.i
1317 define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
1318 ; CHECK: test_vmulxq_laneq_f64_0:
1319 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
1321 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
1322 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) #2
1323 ret <2 x double> %vmulx2.i
1326 define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
1327 ; CHECK: test_vmulxq_laneq_f64:
1328 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[{{[0-9]+}}]
1330 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
1331 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) #2
1332 ret <2 x double> %vmulx2.i