1 ; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s
3 define <8 x half> @add_h(<8 x half> %a, <8 x half> %b) {
38 %0 = fadd <8 x half> %a, %b
43 define <8 x half> @sub_h(<8 x half> %a, <8 x half> %b) {
78 %0 = fsub <8 x half> %a, %b
83 define <8 x half> @mul_h(<8 x half> %a, <8 x half> %b) {
118 %0 = fmul <8 x half> %a, %b
123 define <8 x half> @div_h(<8 x half> %a, <8 x half> %b) {
125 ; CHECK-LABEL: div_h:
158 %0 = fdiv <8 x half> %a, %b
163 define <8 x half> @load_h(<8 x half>* %a) {
165 ; CHECK-LABEL: load_h:
166 ; CHECK: ldr q0, [x0]
167 %0 = load <8 x half>, <8 x half>* %a, align 4
172 define void @store_h(<8 x half>* %a, <8 x half> %b) {
174 ; CHECK-LABEL: store_h:
175 ; CHECK: str q0, [x0]
176 store <8 x half> %b, <8 x half>* %a, align 4
180 define <8 x half> @s_to_h(<8 x float> %a) {
181 ; CHECK-LABEL: s_to_h:
182 ; CHECK-DAG: fcvtn v0.4h, v0.4s
183 ; CHECK-DAG: fcvtn [[REG:v[0-9+]]].4h, v1.4s
184 ; CHECK: ins v0.d[1], [[REG]].d[0]
185 %1 = fptrunc <8 x float> %a to <8 x half>
189 define <8 x half> @d_to_h(<8 x double> %a) {
190 ; CHECK-LABEL: d_to_h:
191 ; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
192 ; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
193 ; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
194 ; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
203 ; CHECK-DAG: ins v{{[0-9]+}}.h
204 ; CHECK-DAG: ins v{{[0-9]+}}.h
205 ; CHECK-DAG: ins v{{[0-9]+}}.h
206 ; CHECK-DAG: ins v{{[0-9]+}}.h
207 ; CHECK-DAG: ins v{{[0-9]+}}.h
208 ; CHECK-DAG: ins v{{[0-9]+}}.h
209 ; CHECK-DAG: ins v{{[0-9]+}}.h
210 ; CHECK-DAG: ins v{{[0-9]+}}.h
211 %1 = fptrunc <8 x double> %a to <8 x half>
215 define <8 x float> @h_to_s(<8 x half> %a) {
216 ; CHECK-LABEL: h_to_s:
217 ; CHECK: fcvtl2 v1.4s, v0.8h
218 ; CHECK: fcvtl v0.4s, v0.4h
219 %1 = fpext <8 x half> %a to <8 x float>
223 define <8 x double> @h_to_d(<8 x half> %a) {
224 ; CHECK-LABEL: h_to_d:
237 %1 = fpext <8 x half> %a to <8 x double>
242 define <8 x half> @bitcast_i_to_h(float, <8 x i16> %a) {
243 ; CHECK-LABEL: bitcast_i_to_h:
244 ; CHECK: mov v0.16b, v1.16b
245 %2 = bitcast <8 x i16> %a to <8 x half>
249 define <8 x i16> @bitcast_h_to_i(float, <8 x half> %a) {
250 ; CHECK-LABEL: bitcast_h_to_i:
251 ; CHECK: mov v0.16b, v1.16b
252 %2 = bitcast <8 x half> %a to <8 x i16>
257 define <8 x half> @sitofp_i8(<8 x i8> %a) #0 {
258 ; CHECK-LABEL: sitofp_i8:
259 ; CHECK-NEXT: sshll v[[REG1:[0-9]+]].8h, v0.8b, #0
260 ; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0
261 ; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0
262 ; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]]
263 ; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]]
264 ; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
265 ; CHECK-DAG: fcvtn v0.4h, [[HIF]]
266 ; CHECK: ins v0.d[1], v[[LOREG]].d[0]
267 %1 = sitofp <8 x i8> %a to <8 x half>
272 define <8 x half> @sitofp_i16(<8 x i16> %a) #0 {
273 ; CHECK-LABEL: sitofp_i16:
274 ; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v0.8h, #0
275 ; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v0.4h, #0
276 ; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]]
277 ; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]]
278 ; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
279 ; CHECK-DAG: fcvtn v0.4h, [[HIF]]
280 ; CHECK: ins v0.d[1], v[[LOREG]].d[0]
281 %1 = sitofp <8 x i16> %a to <8 x half>
286 define <8 x half> @sitofp_i32(<8 x i32> %a) #0 {
287 ; CHECK-LABEL: sitofp_i32:
288 ; CHECK-DAG: scvtf [[OP1:v[0-9]+\.4s]], v0.4s
289 ; CHECK-DAG: scvtf [[OP2:v[0-9]+\.4s]], v1.4s
290 ; CHECK-DAG: fcvtn v[[REG:[0-9]+]].4h, [[OP2]]
291 ; CHECK-DAG: fcvtn v0.4h, [[OP1]]
292 ; CHECK: ins v0.d[1], v[[REG]].d[0]
293 %1 = sitofp <8 x i32> %a to <8 x half>
298 define <8 x half> @sitofp_i64(<8 x i64> %a) #0 {
299 ; CHECK-LABEL: sitofp_i64:
300 ; CHECK-DAG: scvtf [[OP1:v[0-9]+\.2d]], v0.2d
301 ; CHECK-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d
302 ; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]]
303 ; CHECK-DAG: fcvtn2 [[OP3]].4s, [[OP2]]
304 ; CHECK: fcvtn v0.4h, [[OP3]].4s
305 %1 = sitofp <8 x i64> %a to <8 x half>
309 define <8 x half> @uitofp_i8(<8 x i8> %a) #0 {
310 ; CHECK-LABEL: uitofp_i8:
311 ; CHECK-NEXT: ushll v[[REG1:[0-9]+]].8h, v0.8b, #0
312 ; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0
313 ; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0
314 ; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]]
315 ; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]]
316 ; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
317 ; CHECK-DAG: fcvtn v0.4h, [[HIF]]
318 ; CHECK: ins v0.d[1], v[[LOREG]].d[0]
319 %1 = uitofp <8 x i8> %a to <8 x half>
324 define <8 x half> @uitofp_i16(<8 x i16> %a) #0 {
325 ; CHECK-LABEL: uitofp_i16:
326 ; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v0.8h, #0
327 ; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v0.4h, #0
328 ; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]]
329 ; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]]
330 ; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
331 ; CHECK-DAG: fcvtn v0.4h, [[HIF]]
332 ; CHECK: ins v0.d[1], v[[LOREG]].d[0]
333 %1 = uitofp <8 x i16> %a to <8 x half>
338 define <8 x half> @uitofp_i32(<8 x i32> %a) #0 {
339 ; CHECK-LABEL: uitofp_i32:
340 ; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s
341 ; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.4s]], v1.4s
342 ; CHECK-DAG: fcvtn v[[REG:[0-9]+]].4h, [[OP2]]
343 ; CHECK-DAG: fcvtn v0.4h, [[OP1]]
344 ; CHECK: ins v0.d[1], v[[REG]].d[0]
345 %1 = uitofp <8 x i32> %a to <8 x half>
350 define <8 x half> @uitofp_i64(<8 x i64> %a) #0 {
351 ; CHECK-LABEL: uitofp_i64:
352 ; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d
353 ; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d
354 ; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]]
355 ; CHECK-DAG: fcvtn2 [[OP3]].4s, [[OP2]]
356 ; CHECK: fcvtn v0.4h, [[OP3]].4s
357 %1 = uitofp <8 x i64> %a to <8 x half>
361 define void @test_insert_at_zero(half %a, <8 x half>* %b) #0 {
362 ; CHECK-LABEL: test_insert_at_zero:
363 ; CHECK-NEXT: str q0, [x0]
365 %1 = insertelement <8 x half> undef, half %a, i64 0
366 store <8 x half> %1, <8 x half>* %b, align 4
370 define <8 x i8> @fptosi_i8(<8 x half> %a) #0 {
371 ; CHECK-LABEL: fptosi_i8:
372 ; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
373 ; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
374 ; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]]
375 ; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
376 ; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]]
377 ; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]]
378 ; CHECK-NEXT: xtn v0.8b, [[I16]].8h
380 %1 = fptosi<8 x half> %a to <8 x i8>
384 define <8 x i16> @fptosi_i16(<8 x half> %a) #0 {
385 ; CHECK-LABEL: fptosi_i16:
386 ; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
387 ; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
388 ; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]]
389 ; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
390 ; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]]
391 ; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]]
393 %1 = fptosi<8 x half> %a to <8 x i16>
397 define <8 x i8> @fptoui_i8(<8 x half> %a) #0 {
398 ; CHECK-LABEL: fptoui_i8:
399 ; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
400 ; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
401 ; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]]
402 ; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
403 ; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]]
404 ; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]]
405 ; CHECK-NEXT: xtn v0.8b, [[I16]].8h
407 %1 = fptoui<8 x half> %a to <8 x i8>
411 define <8 x i16> @fptoui_i16(<8 x half> %a) #0 {
412 ; CHECK-LABEL: fptoui_i16:
413 ; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
414 ; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
415 ; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]]
416 ; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
417 ; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]]
418 ; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]]
420 %1 = fptoui<8 x half> %a to <8 x i16>
424 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
425 define <8 x i1> @test_fcmp_une(<8 x half> %a, <8 x half> %b) #0 {
426 %1 = fcmp une <8 x half> %a, %b
430 ; FileCheck checks are unwieldy with 16 fcvt and 16 csel tests. Skipped.
431 define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
432 %1 = fcmp ueq <8 x half> %a, %b
436 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
437 define <8 x i1> @test_fcmp_ugt(<8 x half> %a, <8 x half> %b) #0 {
438 %1 = fcmp ugt <8 x half> %a, %b
442 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
443 define <8 x i1> @test_fcmp_uge(<8 x half> %a, <8 x half> %b) #0 {
444 %1 = fcmp uge <8 x half> %a, %b
448 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
449 define <8 x i1> @test_fcmp_ult(<8 x half> %a, <8 x half> %b) #0 {
450 %1 = fcmp ult <8 x half> %a, %b
454 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
455 define <8 x i1> @test_fcmp_ule(<8 x half> %a, <8 x half> %b) #0 {
456 %1 = fcmp ule <8 x half> %a, %b
460 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
461 define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
462 %1 = fcmp uno <8 x half> %a, %b
466 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
467 define <8 x i1> @test_fcmp_one(<8 x half> %a, <8 x half> %b) #0 {
468 %1 = fcmp one <8 x half> %a, %b
472 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
473 define <8 x i1> @test_fcmp_oeq(<8 x half> %a, <8 x half> %b) #0 {
474 %1 = fcmp oeq <8 x half> %a, %b
478 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
479 define <8 x i1> @test_fcmp_ogt(<8 x half> %a, <8 x half> %b) #0 {
480 %1 = fcmp ogt <8 x half> %a, %b
484 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
485 define <8 x i1> @test_fcmp_oge(<8 x half> %a, <8 x half> %b) #0 {
486 %1 = fcmp oge <8 x half> %a, %b
490 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
491 define <8 x i1> @test_fcmp_olt(<8 x half> %a, <8 x half> %b) #0 {
492 %1 = fcmp olt <8 x half> %a, %b
496 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
497 define <8 x i1> @test_fcmp_ole(<8 x half> %a, <8 x half> %b) #0 {
498 %1 = fcmp ole <8 x half> %a, %b
502 ; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
503 define <8 x i1> @test_fcmp_ord(<8 x half> %a, <8 x half> %b) #0 {
504 %1 = fcmp ord <8 x half> %a, %b
508 attributes #0 = { nounwind }