1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
3 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
5 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
7 declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
9 declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
11 declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
13 declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
15 declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
17 declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
19 declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
21 declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
23 declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
25 declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
27 define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
28 ; CHECK-LABEL: test_vmull_high_n_s16:
29 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
30 ; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
32 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
33 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
34 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
35 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
36 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
37 %vmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
38 ret <4 x i32> %vmull15.i.i
41 define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) {
42 ; CHECK-LABEL: test_vmull_high_n_s32:
43 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
44 ; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
46 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
47 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
48 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
49 %vmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
50 ret <2 x i64> %vmull9.i.i
53 define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) {
54 ; CHECK-LABEL: test_vmull_high_n_u16:
55 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
56 ; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
58 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
59 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
60 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
61 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
62 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
63 %vmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
64 ret <4 x i32> %vmull15.i.i
67 define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) {
68 ; CHECK-LABEL: test_vmull_high_n_u32:
69 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
70 ; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
72 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
73 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
74 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
75 %vmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
76 ret <2 x i64> %vmull9.i.i
79 define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) {
80 ; CHECK-LABEL: test_vqdmull_high_n_s16:
81 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
82 ; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
84 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
85 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
86 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
87 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
88 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
89 %vqdmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
90 ret <4 x i32> %vqdmull15.i.i
93 define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) {
94 ; CHECK-LABEL: test_vqdmull_high_n_s32:
95 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
96 ; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
98 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
99 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
100 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
101 %vqdmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
102 ret <2 x i64> %vqdmull9.i.i
105 define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
106 ; CHECK-LABEL: test_vmlal_high_n_s16:
107 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
108 ; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
110 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
111 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
112 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
113 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
114 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
115 %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
116 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
117 ret <4 x i32> %add.i.i
120 define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
121 ; CHECK-LABEL: test_vmlal_high_n_s32:
122 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
123 ; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
125 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
126 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
127 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
128 %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
129 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
130 ret <2 x i64> %add.i.i
133 define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
134 ; CHECK-LABEL: test_vmlal_high_n_u16:
135 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
136 ; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
138 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
139 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
140 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
141 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
142 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
143 %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
144 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
145 ret <4 x i32> %add.i.i
148 define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
149 ; CHECK-LABEL: test_vmlal_high_n_u32:
150 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
151 ; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
153 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
154 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
155 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
156 %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
157 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
158 ret <2 x i64> %add.i.i
161 define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
162 ; CHECK-LABEL: test_vqdmlal_high_n_s16:
163 ; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
165 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
166 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
167 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
168 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
169 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
170 %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
171 %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
172 ret <4 x i32> %vqdmlal17.i.i
175 define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
176 ; CHECK-LABEL: test_vqdmlal_high_n_s32:
177 ; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
179 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
180 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
181 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
182 %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
183 %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
184 ret <2 x i64> %vqdmlal11.i.i
187 define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
188 ; CHECK-LABEL: test_vmlsl_high_n_s16:
189 ; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
191 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
192 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
193 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
194 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
195 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
196 %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
197 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
198 ret <4 x i32> %sub.i.i
201 define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
202 ; CHECK-LABEL: test_vmlsl_high_n_s32:
203 ; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
205 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
206 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
207 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
208 %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
209 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
210 ret <2 x i64> %sub.i.i
213 define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
214 ; CHECK-LABEL: test_vmlsl_high_n_u16:
215 ; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
217 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
218 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
219 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
220 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
221 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
222 %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
223 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
224 ret <4 x i32> %sub.i.i
227 define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
228 ; CHECK-LABEL: test_vmlsl_high_n_u32:
229 ; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
231 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
232 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
233 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
234 %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
235 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
236 ret <2 x i64> %sub.i.i
239 define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
240 ; CHECK-LABEL: test_vqdmlsl_high_n_s16:
241 ; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
243 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
244 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
245 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
246 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
247 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
248 %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
249 %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
250 ret <4 x i32> %vqdmlsl17.i.i
253 define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
254 ; CHECK-LABEL: test_vqdmlsl_high_n_s32:
255 ; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
257 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
258 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
259 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
260 %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
261 %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
262 ret <2 x i64> %vqdmlsl11.i.i
265 define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) {
266 ; CHECK-LABEL: test_vmul_n_f32:
267 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
269 %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
270 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
271 %mul.i = fmul <2 x float> %vecinit1.i, %a
272 ret <2 x float> %mul.i
275 define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) {
276 ; CHECK-LABEL: test_vmulq_n_f32:
277 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
279 %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
280 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
281 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
282 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
283 %mul.i = fmul <4 x float> %vecinit3.i, %a
284 ret <4 x float> %mul.i
287 define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) {
288 ; CHECK-LABEL: test_vmulq_n_f64:
289 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
291 %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
292 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
293 %mul.i = fmul <2 x double> %vecinit1.i, %a
294 ret <2 x double> %mul.i
297 define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
298 ; CHECK-LABEL: test_vfma_n_f32:
299 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
301 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
302 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
303 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
307 define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
308 ; CHECK-LABEL: test_vfmaq_n_f32:
309 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
311 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
312 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
313 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
314 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
315 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
319 define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
320 ; CHECK-LABEL: test_vfms_n_f32:
321 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
323 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
324 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
325 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
326 %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
330 define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
331 ; CHECK-LABEL: test_vfmsq_n_f32:
332 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
334 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
335 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
336 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
337 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
338 %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
339 %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)