From 2a9af9f18eac90b0de739b6ceddf6c2209086303 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 13 Sep 2013 13:46:57 +0000 Subject: [PATCH] [ARMv8] Change hasV8Fp to hasFPARMv8, and other command line options to be more consistent. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190692 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 2 +- lib/Target/ARM/ARMAsmPrinter.cpp | 8 ++--- lib/Target/ARM/ARMBuildAttrs.h | 4 +-- lib/Target/ARM/ARMISelLowering.cpp | 4 +-- lib/Target/ARM/ARMInstrInfo.td | 4 +-- lib/Target/ARM/ARMInstrVFP.td | 32 +++++++++---------- lib/Target/ARM/ARMSubtarget.cpp | 2 +- lib/Target/ARM/ARMSubtarget.h | 6 ++-- .../ARM/2010-09-29-mc-asm-header-test.ll | 20 ++++++------ test/CodeGen/ARM/vminmaxnm.ll | 26 +++++++-------- test/CodeGen/ARM/vsel.ll | 2 +- test/MC/ARM/invalid-neon-v8.s | 2 +- 12 files changed, 56 insertions(+), 56 deletions(-) diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 067ad13ab2c..5752005eacf 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -45,7 +45,7 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", "Enable VFP4 instructions", [FeatureVFP3, FeatureFP16]>; -def FeatureV8FP : SubtargetFeature<"v8fp", "HasV8FP", +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", "Enable ARMv8 FP", [FeatureVFP4]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index ff43d46327c..ee01fcfb669 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -808,7 +808,7 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasV8FP()) + if (Subtarget->hasFPARMv8()) AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-fp-armv8"); else if (Subtarget->hasVFP4()) @@ -821,10 +821,10 @@ void ARMAsmPrinter::emitAttributes() { emitFPU = false; } - /* V8FP + .fpu */ - if (Subtarget->hasV8FP()) { + /* FPARMv8 + .fpu */ + if (Subtarget->hasFPARMv8()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, - ARMBuildAttrs::AllowV8FPA); + ARMBuildAttrs::AllowFPARMv8A); if (emitFPU) AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "fp-armv8"); /* VFPv4 + .fpu */ diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h index f614dcaa79e..1671732d9f0 100644 --- a/lib/Target/ARM/ARMBuildAttrs.h +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -114,8 +114,8 @@ namespace ARMBuildAttrs { AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31 - AllowV8FPA = 7, // Use of the ARM v8-A FP ISA was permitted - AllowV8FPB = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31 + AllowFPARMv8A = 7, // Use of the ARM v8-A FP ISA was permitted + AllowFPARMv8B = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31 // Tag_WMMX_arch, (=11), uleb128 AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 94270eda5ff..c83f7b194ae 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3258,7 +3258,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // inverting the compare condition, swapping 'less' and 'greater') and // sometimes need to swap the operands to the VSEL (which inverts the // condition in the sense of firing whenever the previous condition didn't) - if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 || + if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { ARMCC::CondCodes CondCode = IntCCToARMCC(CC); if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || @@ -3279,7 +3279,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { FPCCToARMCC(CC, CondCode, CondCode2); // Try to generate VSEL on ARMv8. - if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 || + if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { // We can select VMAXNM/VMINNM from a compare followed by a select with the // same operands, as follows: diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b488f264fa5..ef34e2662e6 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -208,8 +208,8 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3", "VFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4", "VFP4">; -def HasV8FP : Predicate<"Subtarget->hasV8FP()">, - AssemblerPredicate<"FeatureV8FP", "V8FP">; +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 34d83c6446d..f0da06d72a8 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -340,13 +340,13 @@ multiclass vsel_inst opc, int CC> { (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>, - Requires<[HasV8FP]>; + Requires<[HasFPARMv8]>; def D : ADbInp<0b11100, opc, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>, - Requires<[HasV8FP]>; + Requires<[HasFPARMv8]>; } } @@ -362,13 +362,13 @@ multiclass vmaxmin_inst { (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>, - Requires<[HasV8FP]>; + Requires<[HasFPARMv8]>; def D : ADbInp<0b11101, 0b00, opc, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"), [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>, - Requires<[HasV8FP]>; + Requires<[HasFPARMv8]>; } } @@ -538,7 +538,7 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { // Instruction operands. bits<5> Sm; @@ -550,7 +550,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { // Instruction operands. bits<5> Sd; bits<5> Dm; @@ -565,7 +565,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { // Instruction operands. bits<5> Sm; @@ -577,7 +577,7 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { // Instruction operands. bits<5> Sd; bits<5> Dm; @@ -594,21 +594,21 @@ multiclass vcvt_inst rm> { def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { bits<5> Dm; let Inst{17-16} = rm; @@ -622,7 +622,7 @@ multiclass vcvt_inst rm> { def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { bits<5> Dm; let Inst{17-16} = rm; @@ -658,14 +658,14 @@ multiclass vrint_inst_zrx { def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{7} = op2; let Inst{16} = op; } def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{7} = op2; let Inst{16} = op; } @@ -685,13 +685,13 @@ multiclass vrint_inst_anpm rm> { def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e9254c3d90c..a227718e306 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -81,7 +81,7 @@ void ARMSubtarget::initializeEnvironment() { HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; - HasV8FP = false; + HasFPARMv8 = false; HasNEON = false; UseNEONForSinglePrecisionFP = false; UseMulOps = UseFusedMulOps; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index b13ff9ddeb6..65278a5846f 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -48,12 +48,12 @@ protected: bool HasV7Ops; bool HasV8Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasV8FP, HasNEON - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; bool HasVFPv4; - bool HasV8FP; + bool HasFPARMv8; bool HasNEON; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been @@ -246,7 +246,7 @@ public: bool hasVFP2() const { return HasVFPv2; } bool hasVFP3() const { return HasVFPv3; } bool hasVFP4() const { return HasVFPv4; } - bool hasV8FP() const { return HasV8FP; } + bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } diff --git a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll index 174a641c6e1..b2717130add 100644 --- a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll +++ b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8 ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+v8fp | FileCheck %s --check-prefix=V8-V8FP +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+fp-armv8 | FileCheck %s --check-prefix=V8-FPARMv8 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+neon | FileCheck %s --check-prefix=V8-NEON -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+v8fp -mattr=+neon | FileCheck %s --check-prefix=V8-V8FP-NEON +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+fp-armv8 -mattr=+neon | FileCheck %s --check-prefix=V8-FPARMv8-NEON ; This tests that MC/asm header conversion is smooth ; ; V7: .syntax unified @@ -20,19 +20,19 @@ ; Vt8: .syntax unified ; Vt8: .eabi_attribute 6, 14 -; V8-V8FP: .syntax unified -; V8-V8FP: .eabi_attribute 6, 14 -; V8-V8FP: .eabi_attribute 10, 7 -; V8-V8FP: .fpu fp-armv8 +; V8-FPARMv8: .syntax unified +; V8-FPARMv8: .eabi_attribute 6, 14 +; V8-FPARMv8: .eabi_attribute 10, 7 +; V8-FPARMv8: .fpu fp-armv8 ; V8-NEON: .syntax unified ; V8-NEON: .eabi_attribute 6, 14 ; V8-NEON: .eabi_attribute 12, 3 -; V8-V8FP-NEON: .syntax unified -; V8-V8FP-NEON: .eabi_attribute 6, 14 -; V8-V8FP-NEON: .fpu neon-fp-armv8 -; V8-V8FP-NEON: .eabi_attribute 10, 7 +; V8-FPARMv8-NEON: .syntax unified +; V8-FPARMv8-NEON: .eabi_attribute 6, 14 +; V8-FPARMv8-NEON: .fpu neon-fp-armv8 +; V8-FPARMv8-NEON: .eabi_attribute 10, 7 define i32 @f(i64 %z) { ret i32 0 diff --git a/test/CodeGen/ARM/vminmaxnm.ll b/test/CodeGen/ARM/vminmaxnm.ll index fdf0c6a7627..f6ce64c54a3 100644 --- a/test/CodeGen/ARM/vminmaxnm.ll +++ b/test/CodeGen/ARM/vminmaxnm.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple armv8 -mattr=+neon | FileCheck %s -; RUN: llc < %s -mtriple armv8 -mattr=+neon,+v8fp -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK: vmaxnmq @@ -37,44 +37,44 @@ define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind { ret <2 x float> %tmp3 } -define float @v8fp_vminnm_o(float %a, float %b) { -; CHECK-FAST: v8fp_vminnm_o +define float @fp-armv8_vminnm_o(float %a, float %b) { +; CHECK-FAST: fp-armv8_vminnm_o ; CHECK-FAST-NOT: vcmp ; CHECK-FAST: vminnm.f32 -; CHECK: v8fp_vminnm_o +; CHECK: fp-armv8_vminnm_o ; CHECK-NOT: vminnm.f32 %cmp = fcmp olt float %a, %b %cond = select i1 %cmp, float %a, float %b ret float %cond } -define float @v8fp_vminnm_u(float %a, float %b) { -; CHECK-FAST: v8fp_vminnm_u +define float @fp-armv8_vminnm_u(float %a, float %b) { +; CHECK-FAST: fp-armv8_vminnm_u ; CHECK-FAST-NOT: vcmp ; CHECK-FAST: vminnm.f32 -; CHECK: v8fp_vminnm_u +; CHECK: fp-armv8_vminnm_u ; CHECK-NOT: vminnm.f32 %cmp = fcmp ult float %a, %b %cond = select i1 %cmp, float %a, float %b ret float %cond } -define float @v8fp_vmaxnm_o(float %a, float %b) { -; CHECK-FAST: v8fp_vmaxnm_o +define float @fp-armv8_vmaxnm_o(float %a, float %b) { +; CHECK-FAST: fp-armv8_vmaxnm_o ; CHECK-FAST-NOT: vcmp ; CHECK-FAST: vmaxnm.f32 -; CHECK: v8fp_vmaxnm_o +; CHECK: fp-armv8_vmaxnm_o ; CHECK-NOT: vmaxnm.f32 %cmp = fcmp ogt float %a, %b %cond = select i1 %cmp, float %a, float %b ret float %cond } -define float @v8fp_vmaxnm_u(float %a, float %b) { -; CHECK-FAST: v8fp_vmaxnm_u +define float @fp-armv8_vmaxnm_u(float %a, float %b) { +; CHECK-FAST: fp-armv8_vmaxnm_u ; CHECK-FAST-NOT: vcmp ; CHECK-FAST: vmaxnm.f32 -; CHECK: v8fp_vmaxnm_u +; CHECK: fp-armv8_vmaxnm_u ; CHECK-NOT: vmaxnm.f32 %cmp = fcmp ugt float %a, %b %cond = select i1 %cmp, float %a, float %b diff --git a/test/CodeGen/ARM/vsel.ll b/test/CodeGen/ARM/vsel.ll index f4ee800f6fd..7e1f7146fd1 100644 --- a/test/CodeGen/ARM/vsel.ll +++ b/test/CodeGen/ARM/vsel.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+v8fp -float-abi=hard | FileCheck %s +; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s @varfloat = global float 0.0 @vardouble = global double 0.0 define void @test_vsel32sgt(i32 %lhs32, i32 %rhs32, float %a, float %b) { diff --git a/test/MC/ARM/invalid-neon-v8.s b/test/MC/ARM/invalid-neon-v8.s index bdcb6fbe779..06406f3f9ca 100644 --- a/test/MC/ARM/invalid-neon-v8.s +++ b/test/MC/ARM/invalid-neon-v8.s @@ -8,7 +8,7 @@ vmaxnmge.f64.f64 s4, d5, q1 @ CHECK: error: instruction 'vmaxnm' is not predicable, but condition code specified vcvta.s32.f32 s1, s2 -@ CHECK: error: instruction requires: V8FP +@ CHECK: error: instruction requires: FPARMv8 vcvtp.u32.f32 s1, d2 @ CHECK: error: invalid operand for instruction vcvtp.f32.u32 d1, q2 -- 2.34.1