From 8fb8da13e0d028aabf152edb15b31559ee278d97 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Wed, 16 Dec 2015 11:35:44 +0000 Subject: [PATCH] [ARM] Add ARMv8.2-A FP16 scalar instructions ARMv8.2-A adds 16-bit floating point versions of all existing VFP floating-point instructions. This is an optional extension, so all of these instructions require the FeatureFullFP16 subtarget feature. The assembly for these instructions uses S registers (AArch32 does not have H registers), but the instructions have ".f16" type specifiers rather than ".f32" or ".f64". The top 16 bits of each source register are ignored, and the top 16 bits of the destination register are set to zero. These instructions are mostly the same as the 32- and 64-bit versions, but they use coprocessor 9 rather than 10 and 11. Two new instructions, VMOVX and VINS, have been added to allow packing and extracting two 16-bit floats stored in the top and bottom halves of an S register. New fixup kinds have been added for the PC-relative load and store instructions, but no ELF relocations have been added as they have a range of 512 bytes. Differential Revision: http://reviews.llvm.org/D15038 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255762 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 134 ++++++ lib/Target/ARM/ARMInstrInfo.td | 15 + lib/Target/ARM/ARMInstrVFP.td | 383 ++++++++++++++++++ lib/Target/ARM/ARMSchedule.td | 12 + lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 44 +- .../ARM/Disassembler/ARMDisassembler.cpp | 23 ++ lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 28 ++ lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 3 + .../ARM/MCTargetDesc/ARMAddressingModes.h | 51 ++- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 41 ++ lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h | 7 + .../ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 49 ++- test/MC/ARM/fullfp16-neg.s | 189 +++++++++ test/MC/ARM/fullfp16.s | 257 ++++++++++++ test/MC/Disassembler/ARM/fullfp16-arm-neg.txt | 188 +++++++++ test/MC/Disassembler/ARM/fullfp16-arm.txt | 186 +++++++++ .../Disassembler/ARM/fullfp16-thumb-neg.txt | 186 +++++++++ test/MC/Disassembler/ARM/fullfp16-thumb.txt | 186 +++++++++ 18 files changed, 1976 insertions(+), 6 deletions(-) create mode 100644 test/MC/ARM/fullfp16-neg.s create mode 100644 test/MC/ARM/fullfp16.s create mode 100644 test/MC/Disassembler/ARM/fullfp16-arm-neg.txt create mode 100644 test/MC/Disassembler/ARM/fullfp16-arm.txt create mode 100644 test/MC/Disassembler/ARM/fullfp16-thumb-neg.txt create mode 100644 test/MC/Disassembler/ARM/fullfp16-thumb.txt diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index e79608d360c..50bb9af71da 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1495,6 +1495,32 @@ class ASI5 opcod1, bits<2> opcod2, dag oops, dag iops, let D = VFPNeonDomain; } +class AHI5 opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, + string opc, string asm, list pattern> + : VFPI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<13> addr; + + // Encode instruction operands. + let Inst{23} = addr{8}; // U (add = (U == '1')) + let Inst{22} = Sd{0}; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Sd{4-1}; + let Inst{7-0} = addr{7-0}; // imm8 + + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + + // Loads & stores operate on both NEON and VFP pipelines. + let D = VFPNeonDomain; +} + // VFP Load / store multiple pseudo instructions. class PseudoVFPLdStM pattern> @@ -1817,6 +1843,114 @@ class ASbIn opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{22} = Sd{0}; } +// Half precision, unary, predicated +class AHuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list pattern> + : VFPAI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Half precision, unary, non-predicated +class AHuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : VFPXI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Half precision, binary +class AHbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : VFPAI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + let Inst{6} = op6; + let Inst{4} = op4; +} + +// Half precision, binary, not predicated +class AHbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list pattern> + : VFPXI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + let Inst{6} = opcod3; + let Inst{4} = 0; +} + // VFP conversion instructions class AVConv1I opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 2aa9475e6f4..292d5d62fc4 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -981,6 +981,21 @@ def addrmode5_pre : AddrMode5 { let PrintMethod = "printAddrMode5Operand"; } +// addrmode5fp16 := reg +/- imm8*2 +// +def AddrMode5FP16AsmOperand : AsmOperandClass { let Name = "AddrMode5FP16"; } +class AddrMode5FP16 : Operand, + ComplexPattern { + let EncoderMethod = "getAddrMode5FP16OpValue"; + let DecoderMethod = "DecodeAddrMode5FP16Operand"; + let ParserMatchClass = AddrMode5FP16AsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm); +} + +def addrmode5fp16 : AddrMode5FP16 { + let PrintMethod = "printAddrMode5FP16Operand"; +} + // addrmode6 := reg with optional alignment // def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 050cd1a445a..8a175fdaefa 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -30,6 +30,18 @@ def FPImmOperand : AsmOperandClass { let ParserMethod = "parseFPImm"; } +def vfp_f16imm : Operand, + PatLeaf<(f16 fpimm), [{ + return ARM_AM::getFP16Imm(N->getValueAPF()) != -1; + }], SDNodeXFormgetValueAPF(); + uint32_t enc = ARM_AM::getFP16Imm(InVal); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; +} + def vfp_f32imm : Operand, PatLeaf<(f32 fpimm), [{ return ARM_AM::getFP32Imm(N->getValueAPF()) != -1; @@ -98,6 +110,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), let D = VFPNeonDomain; } +def VLDRH : AHI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5fp16:$addr), + IIC_fpLoad16, "vldr", ".16\t$Sd, $addr", + []>, + Requires<[HasFullFP16]>; + } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in' def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), @@ -112,6 +129,11 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), let D = VFPNeonDomain; } +def VSTRH : AHI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5fp16:$addr), + IIC_fpStore16, "vstr", ".16\t$Sd, $addr", + []>, + Requires<[HasFullFP16]>; + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -295,6 +317,12 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VADDH : AHbI<0b11100, 0b11, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm", + []>; + let TwoOperandAliasConstraint = "$Dn = $Dd" in def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -311,6 +339,12 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VSUBH : AHbI<0b11100, 0b11, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm", + []>; + let TwoOperandAliasConstraint = "$Dn = $Dd" in def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -323,6 +357,12 @@ def VDIVS : ASbI<0b11101, 0b00, 0, 0, IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>; +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VDIVH : AHbI<0b11101, 0b00, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm", + []>; + let TwoOperandAliasConstraint = "$Dn = $Dd" in def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -339,6 +379,12 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VMULH : AHbI<0b11100, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm", + []>; + def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", @@ -353,9 +399,20 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } +def VNMULH : AHbI<0b11100, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm", + []>; + multiclass vsel_inst opc, int CC> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "", Uses = [CPSR], AddedComplexity = 4 in { + def H : AHbInp<0b11100, opc, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"), + []>, + Requires<[HasFullFP16]>; + def S : ASbInp<0b11100, opc, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), @@ -378,6 +435,12 @@ defm VSELVS : vsel_inst<"vs", 0b01, 6>; multiclass vmaxmin_inst { let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def H : AHbInp<0b11101, 0b00, opc, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"), + []>, + Requires<[HasFullFP16]>; + def S : ASbInp<0b11101, 0b00, opc, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), @@ -418,6 +481,12 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, let D = VFPNeonA8Domain; } +def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, + (outs), (ins SPR:$Sd, SPR:$Sm), + IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", + []>; + + // FIXME: Verify encoding after integrated assembler is working. def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), @@ -432,6 +501,11 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } + +def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, + (outs), (ins SPR:$Sd, SPR:$Sm), + IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm", + []>; } // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// @@ -452,6 +526,11 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, let D = VFPNeonA8Domain; } +def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm", + []>; + let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), @@ -473,6 +552,14 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, let D = VFPNeonA8Domain; } +def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, + (outs), (ins SPR:$Sd), + IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0", + []> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; +} + // FIXME: Verify encoding after integrated assembler is working. def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), @@ -493,6 +580,14 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } + +def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, + (outs), (ins SPR:$Sd), + IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0", + []> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; +} } // Defs = [FPSCR_NZCV] def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, @@ -627,6 +722,22 @@ def : Pat<(f64 (f16_to_fp GPR:$a)), multiclass vcvt_inst rm, SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def SH : AHuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f16\t$Sd, $Sm"), + []>, + Requires<[HasFullFP16]> { + let Inst{17-16} = rm; + } + + def UH : AHuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f16\t$Sd, $Sm"), + []>, + Requires<[HasFullFP16]> { + let Inst{17-16} = rm; + } + def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), @@ -715,7 +826,21 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } +def VNEGH : AHuI<0b11101, 0b11, 0b0001, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vneg", ".f16\t$Sd, $Sm", + []>; + multiclass vrint_inst_zrx { + def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm", + []>, + Requires<[HasFullFP16]> { + let Inst{7} = op2; + let Inst{16} = op; + } + def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", @@ -733,6 +858,9 @@ multiclass vrint_inst_zrx { let Inst{16} = op; } + def : InstAlias(NAME#"H") SPR:$Sd, SPR:$Sm, pred:$p)>, + Requires<[HasFullFP16]>; def : InstAlias(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>, Requires<[HasFPARMv8]>; @@ -748,6 +876,13 @@ defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>; multiclass vrint_inst_anpm rm, SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"), + []>, + Requires<[HasFullFP16]> { + let Inst{17-16} = rm; + } def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), @@ -787,6 +922,11 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; +def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm", + []>; + let hasSideEffects = 0 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -795,6 +935,18 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>; + +let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { +def VMOVH : ASuInp<0b11101, 0b11, 0b0000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vmovx.f16\t$Sd, $Sm", []>, + Requires<[HasFullFP16]>; + +def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vins.f16\t$Sd, $Sm", []>, + Requires<[HasFullFP16]>; +} // PostEncoderMethod } // hasSideEffects //===----------------------------------------------------------------------===// @@ -966,6 +1118,44 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, let DecoderMethod = "DecodeVMOVSRR"; } +// Move H->R, clearing top 16 bits +def VMOVRH : AVConv2I<0b11100001, 0b1001, + (outs GPR:$Rt), (ins SPR:$Sn), + IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn", + []>, + Requires<[HasFullFP16]> { + // Instruction operands. + bits<4> Rt; + bits<5> Sn; + + // Encode instruction operands. + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Rt; + + let Inst{6-5} = 0b00; + let Inst{3-0} = 0b0000; +} + +// Move R->H, clearing top 16 bits +def VMOVHR : AVConv4I<0b11100000, 0b1001, + (outs SPR:$Sn), (ins GPR:$Rt), + IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt", + []>, + Requires<[HasFullFP16]> { + // Instruction operands. + bits<5> Sn; + bits<4> Rt; + + // Encode instruction operands. + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Rt; + + let Inst{6-5} = 0b00; + let Inst{3-0} = 0b0000; +} + // FMRDH: SPR -> GPR // FMRDL: SPR -> GPR // FMRRS: SPR -> GPR @@ -1011,6 +1201,25 @@ class AVConv1InSs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Sd{0}; } +class AVConv1IHs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, + list pattern> + : AVConv1I { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Predicates = [HasFullFP16]; +} + def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", @@ -1043,6 +1252,13 @@ def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VSITOS (VLDRS addrmode5:$a))>; +def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm", + []> { + let Inst{7} = 1; // s32 +} + def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", @@ -1075,6 +1291,13 @@ def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VUITOS (VLDRS addrmode5:$a))>; +def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm", + []> { + let Inst{7} = 0; // u32 +} + // FP -> Int: class AVConv1IsD_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -1113,6 +1336,25 @@ class AVConv1InsS_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Sd{0}; } +class AVConv1IsH_Encode opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, + list pattern> + : AVConv1I { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Predicates = [HasFullFP16]; +} + // Always set Z bit in the instruction, i.e. "round towards zero" variants. def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), @@ -1147,6 +1389,13 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; +def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 1; // Z bit +} + def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", @@ -1180,6 +1429,13 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; +def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 1; // Z bit +} + // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. let Uses = [FPSCR] in { // FIXME: Verify encoding after integrated assembler is working. @@ -1197,6 +1453,13 @@ def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, let Inst{7} = 0; // Z bit } +def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 0; // Z bit +} + def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm", @@ -1210,6 +1473,13 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> { let Inst{7} = 0; // Z bit } + +def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 0; // Z bit +} } // Convert between floating-point and fixed-point @@ -1249,6 +1519,26 @@ class AVConv1XInsD_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, let Predicates = [HasVFP2, HasDPVFP]; } +def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { @@ -1299,6 +1589,26 @@ def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1, // Fixed-Point to FP: +def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { @@ -1373,6 +1683,13 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, let D = VFPNeonA8Domain; } +def VMLAH : AHbI<0b11100, 0b00, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vmla", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1400,6 +1717,13 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, let D = VFPNeonA8Domain; } +def VMLSH : AHbI<0b11100, 0b00, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vmls", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1427,6 +1751,13 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, let D = VFPNeonA8Domain; } +def VNMLAH : AHbI<0b11100, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vnmla", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1453,6 +1784,13 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, let D = VFPNeonA8Domain; } +def VNMLSH : AHbI<0b11100, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1482,6 +1820,13 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, // VFP pipelines. } +def VFMAH : AHbI<0b11101, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1517,6 +1862,13 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, // VFP pipelines. } +def VFMSH : AHbI<0b11101, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1559,6 +1911,13 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, // VFP pipelines. } +def VFNMAH : AHbI<0b11101, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1600,6 +1959,13 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0, // VFP pipelines. } +def VFNMSH : AHbI<0b11101, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1780,6 +2146,23 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), let Inst{7-4} = 0b0000; let Inst{3-0} = imm{3-0}; } + +def FCONSTH : VFPAI<(outs SPR:$Sd), (ins vfp_f16imm:$imm), + VFPMiscFrm, IIC_fpUNA16, + "vmov", ".f16\t$Sd, $imm", + []>, Requires<[HasFullFP16]> { + bits<5> Sd; + bits<8> imm; + + let Inst{27-23} = 0b11101; + let Inst{22} = Sd{0}; + let Inst{21-20} = 0b11; + let Inst{19-16} = imm{7-4}; + let Inst{15-12} = Sd{4-1}; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-4} = 0b0000; + let Inst{3-0} = imm{3-0}; +} } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 528c4ec7378..c485e5111be 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -186,38 +186,50 @@ def IIC_iStore_mu : InstrItinClass; def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; +def IIC_fpUNA16 : InstrItinClass; def IIC_fpUNA32 : InstrItinClass; def IIC_fpUNA64 : InstrItinClass; +def IIC_fpCMP16 : InstrItinClass; def IIC_fpCMP32 : InstrItinClass; def IIC_fpCMP64 : InstrItinClass; def IIC_fpCVTSD : InstrItinClass; def IIC_fpCVTDS : InstrItinClass; def IIC_fpCVTSH : InstrItinClass; def IIC_fpCVTHS : InstrItinClass; +def IIC_fpCVTIH : InstrItinClass; def IIC_fpCVTIS : InstrItinClass; def IIC_fpCVTID : InstrItinClass; +def IIC_fpCVTHI : InstrItinClass; def IIC_fpCVTSI : InstrItinClass; def IIC_fpCVTDI : InstrItinClass; def IIC_fpMOVIS : InstrItinClass; def IIC_fpMOVID : InstrItinClass; def IIC_fpMOVSI : InstrItinClass; def IIC_fpMOVDI : InstrItinClass; +def IIC_fpALU16 : InstrItinClass; def IIC_fpALU32 : InstrItinClass; def IIC_fpALU64 : InstrItinClass; +def IIC_fpMUL16 : InstrItinClass; def IIC_fpMUL32 : InstrItinClass; def IIC_fpMUL64 : InstrItinClass; +def IIC_fpMAC16 : InstrItinClass; def IIC_fpMAC32 : InstrItinClass; def IIC_fpMAC64 : InstrItinClass; +def IIC_fpFMAC16 : InstrItinClass; def IIC_fpFMAC32 : InstrItinClass; def IIC_fpFMAC64 : InstrItinClass; +def IIC_fpDIV16 : InstrItinClass; def IIC_fpDIV32 : InstrItinClass; def IIC_fpDIV64 : InstrItinClass; +def IIC_fpSQRT16 : InstrItinClass; def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; +def IIC_fpLoad16 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; def IIC_fpLoad_m : InstrItinClass; def IIC_fpLoad_mu : InstrItinClass; +def IIC_fpStore16 : InstrItinClass; def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; def IIC_fpStore_m : InstrItinClass; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 73f33087756..72c98f01b38 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1183,6 +1183,20 @@ public: return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) || Val == INT32_MIN; } + bool isAddrMode5FP16() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa(getImm())) + return true; + if (!isMem() || Memory.Alignment != 0) return false; + // Check for register offset. + if (Memory.OffsetRegNum) return false; + // Immediate offset in range [-510, 510] and a multiple of 2. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val >= -510 && Val <= 510 && ((Val & 1) == 0)) || Val == INT32_MIN; + } bool isMemTBB() const { if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) @@ -2145,6 +2159,28 @@ public: Inst.addOperand(MCOperand::createImm(Val)); } + void addAddrMode5FP16Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::createExpr(getImm())); + Inst.addOperand(MCOperand::createImm(0)); + return; + } + + // The lower bit is always zero and as such is not encoded. + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 2 : 0; + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM5FP16Opc(AddSub, Val); + Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::createImm(Val)); + } + void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); // If we have an immediate that's not a constant, treat it as a label @@ -4973,7 +5009,8 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { // vmov.i{8|16|32|64} , #imm ARMOperand &TyOp = static_cast(*Operands[2]); bool isVmovf = TyOp.isToken() && - (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64"); + (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64" || + TyOp.getToken() == ".f16"); ARMOperand &Mnemonic = static_cast(*Operands[0]); bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" || Mnemonic.getToken() == "fconsts"); @@ -5265,7 +5302,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || - Mnemonic.startswith("vsel")) + Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5369,7 +5406,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || - (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { + (FullInst.startswith("vmull") && FullInst.endswith(".p64")) || + Mnemonic == "vmovx" || Mnemonic == "vins") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 8042fcd1301..a05111e4ceb 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -222,6 +222,8 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, @@ -2183,6 +2185,7 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); + // U == 1 to add imm, 0 to subtract it. unsigned U = fieldFromInstruction(Val, 8, 1); unsigned imm = fieldFromInstruction(Val, 0, 8); @@ -2197,6 +2200,26 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, return S; } +static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Val, 9, 4); + // U == 1 to add imm, 0 to subtract it. + unsigned U = fieldFromInstruction(Val, 8, 1); + unsigned imm = fieldFromInstruction(Val, 0, 8); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + if (U) + Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5FP16Opc(ARM_AM::add, imm))); + else + Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5FP16Opc(ARM_AM::sub, imm))); + + return S; +} + static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index c639540b6c8..11330877c0e 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -644,6 +644,34 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, O << "]" << markup(">"); } +template +void ARMInstPrinter::printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum, STI, O); + return; + } + + O << markup(""); + } + O << "]" << markup(">"); +} + void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 3927c9f8bfd..03db55569a2 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -74,6 +74,9 @@ public: template void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template + void printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index b03cada9a64..3959eab966a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -486,7 +486,7 @@ namespace ARM_AM { // addrmode5 := reg +/- imm8*4 // // The first operand is always a Reg. The second operand encodes the - // operation in bit 8 and the immediate in bits 0-7. + // operation (add or subtract) in bit 8 and the immediate in bits 0-7. /// getAM5Opc - This function encodes the addrmode5 opc field. static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { @@ -500,6 +500,29 @@ namespace ARM_AM { return ((AM5Opc >> 8) & 1) ? sub : add; } + //===--------------------------------------------------------------------===// + // Addressing Mode #5 FP16 + //===--------------------------------------------------------------------===// + // + // This is used for coprocessor instructions, such as 16-bit FP load/stores. + // + // addrmode5fp16 := reg +/- imm8*2 + // + // The first operand is always a Reg. The second operand encodes the + // operation (add or subtract) in bit 8 and the immediate in bits 0-7. + + /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field. + static inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) { + bool isSub = Opc == sub; + return ((int)isSub << 8) | Offset; + } + static inline unsigned char getAM5FP16Offset(unsigned AM5Opc) { + return AM5Opc & 0xFF; + } + static inline AddrOpc getAM5FP16Op(unsigned AM5Opc) { + return ((AM5Opc >> 8) & 1) ? sub : add; + } + //===--------------------------------------------------------------------===// // Addressing Mode #6 //===--------------------------------------------------------------------===// @@ -650,6 +673,32 @@ namespace ARM_AM { return FPUnion.F; } + /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit + /// floating-point value. If the value cannot be represented as an 8-bit + /// floating-point value, then return -1. + static inline int getFP16Imm(const APInt &Imm) { + uint32_t Sign = Imm.lshr(15).getZExtValue() & 1; + int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15 + int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0x3f) + return -1; + Mantissa >>= 6; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; + } + + static inline int getFP16Imm(const APFloat &FPImm) { + return getFP16Imm(FPImm.bitcastToAPInt()); + } + /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index fa52c9354c1..9b60ce57b90 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -62,6 +62,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_9", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_thumb_adr_pcrel_10", 0, 8, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, @@ -105,6 +109,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_9", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_thumb_adr_pcrel_10", 8, 8, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, @@ -624,6 +632,37 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } + case ARM::fixup_arm_pcrel_9: + Value = Value - 4; // ARM fixups offset by an additional word and don't + // need to adjust for the half-word ordering. + // Fall through. + case ARM::fixup_t2_pcrel_9: { + // Offset by 4, adjusted by two due to the half-word ordering of thumb. + Value = Value - 4; + bool isAdd = true; + if ((int64_t)Value < 0) { + Value = -Value; + isAdd = false; + } + // These values don't encode the low bit since it's always zero. + if (Ctx && (Value & 1)) { + Ctx->reportError(Fixup.getLoc(), "invalid value for this fixup"); + return 0; + } + Value >>= 1; + if (Ctx && Value >= 256) { + Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + return 0; + } + Value |= isAdd << 23; + + // Same addressing mode as fixup_arm_pcrel_9, but with 16-bit halfwords + // swapped. + if (Kind == ARM::fixup_t2_pcrel_9) + return swapHalfWords(Value, IsLittleEndian); + + return Value; + } } } @@ -695,6 +734,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_pcrel_10_unscaled: case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_pcrel_9: case ARM::fixup_arm_adr_pcrel_12: case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_condbl: @@ -708,6 +748,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_t2_condbranch: case ARM::fixup_t2_uncondbranch: case ARM::fixup_t2_pcrel_10: + case ARM::fixup_t2_pcrel_9: case ARM::fixup_t2_adr_pcrel_12: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 46ba57170db..51dbe1449b6 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -33,6 +33,13 @@ enum Fixups { // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for // the short-swapped encoding of Thumb2 instructions. fixup_t2_pcrel_10, + // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses + // used in VFP instructions where bit 0 not encoded (so it's encoded as an + // 8-bit immediate). + fixup_arm_pcrel_9, + // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for + // the short-swapped encoding of Thumb2 instructions. + fixup_t2_pcrel_9, // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol // addresses where the lower 2 bits are not encoded (so it's encoded as an // 8-bit immediate). diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index b88578309f0..a8635ff3403 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -255,11 +255,16 @@ public: SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; - /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand. + /// getAddrMode5OpValue - Return encoding info for 'reg +/- (imm8 << 2)' operand. uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + /// getAddrMode5FP16OpValue - Return encoding info for 'reg +/- (imm8 << 1)' operand. + uint32_t getAddrMode5FP16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + /// getCCOutOpValue - Return encoding of the 's' bit. unsigned getCCOutOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, @@ -1252,7 +1257,7 @@ getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx, return (MO.getImm() >> 2); } -/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand. +/// getAddrMode5OpValue - Return encoding info for 'reg +/- (imm8 << 2)' operand. uint32_t ARMMCCodeEmitter:: getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, @@ -1292,6 +1297,46 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, return Binary; } +/// getAddrMode5FP16OpValue - Return encoding info for 'reg +/- (imm8 << 1)' operand. +uint32_t ARMMCCodeEmitter:: +getAddrMode5FP16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // {12-9} = reg + // {8} = (U)nsigned (add == '1', sub == '0') + // {7-0} = imm8 + unsigned Reg, Imm8; + bool isAdd; + // If The first operand isn't a register, we have a label reference. + const MCOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) { + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. + Imm8 = 0; + isAdd = false; // 'U' bit is handled as part of the fixup. + + assert(MO.isExpr() && "Unexpected machine operand type!"); + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind; + if (isThumb2(STI)) + Kind = MCFixupKind(ARM::fixup_t2_pcrel_9); + else + Kind = MCFixupKind(ARM::fixup_arm_pcrel_9); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); + + ++MCNumCPRelocations; + } else { + EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups, STI); + isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add; + } + + uint32_t Binary = ARM_AM::getAM5Offset(Imm8); + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (isAdd) + Binary |= (1 << 8); + Binary |= (Reg << 9); + return Binary; +} + unsigned ARMMCCodeEmitter:: getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, diff --git a/test/MC/ARM/fullfp16-neg.s b/test/MC/ARM/fullfp16-neg.s new file mode 100644 index 00000000000..4ac4683428f --- /dev/null +++ b/test/MC/ARM/fullfp16-neg.s @@ -0,0 +1,189 @@ +@ RUN: not llvm-mc -triple armv8a-none-eabi -mattr=-fullfp16 -show-encoding < %s 2>&1 | FileCheck %s +@ RUN: not llvm-mc -triple armv8a-none-eabi -mattr=-fullfp16,+thumb-mode -show-encoding < %s 2>&1 | FileCheck %s + + vadd.f16 s0, s1, s0 +@ CHECK: error: instruction requires: + + vsub.f16 s0, s1, s0 +@ CHECK: error: instruction requires: + + vdiv.f16 s0, s1, s0 +@ CHECK: error: instruction requires: + + vmul.f16 s0, s1, s0 +@ CHECK: error: instruction requires: + + vnmul.f16 s0, s1, s0 +@ CHECK: error: instruction requires: + + vmla.f16 s1, s2, s0 +@ CHECK: error: instruction requires: + + vmls.f16 s1, s2, s0 +@ CHECK: error: instruction requires: + + vnmla.f16 s1, s2, s0 +@ CHECK: error: instruction requires: + + vnmls.f16 s1, s2, s0 +@ CHECK: error: instruction requires: + + vcmp.f16 s0, s1 +@ CHECK: error: instruction requires: + + vcmp.f16 s2, #0 +@ CHECK: error: instruction requires: + + vcmpe.f16 s1, s0 +@ CHECK: error: instruction requires: + + vcmpe.f16 s0, #0 +@ CHECK: error: instruction requires: + + vabs.f16 s0, s0 +@ CHECK: error: instruction requires: + + vneg.f16 s0, s0 +@ CHECK: error: instruction requires: + + vsqrt.f16 s0, s0 +@ CHECK: error: instruction requires: + + vcvt.f16.s32 s0, s0 + vcvt.f16.u32 s0, s0 + vcvt.s32.f16 s0, s0 + vcvt.u32.f16 s0, s0 +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: + + vcvtr.s32.f16 s0, s1 + vcvtr.u32.f16 s0, s1 +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: + + vcvt.f16.u32 s0, s0, #20 + vcvt.f16.u16 s0, s0, #1 + vcvt.f16.s32 s1, s1, #20 + vcvt.f16.s16 s17, s17, #1 + vcvt.u32.f16 s12, s12, #20 + vcvt.u16.f16 s28, s28, #1 + vcvt.s32.f16 s1, s1, #20 + vcvt.s16.f16 s17, s17, #1 +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: + + vcvta.s32.f16 s2, s3 +@ CHECK: error: instruction requires: + + vcvtn.s32.f16 s6, s23 +@ CHECK: error: instruction requires: + + vcvtp.s32.f16 s0, s4 +@ CHECK: error: instruction requires: + + vcvtm.s32.f16 s17, s8 +@ CHECK: error: instruction requires: + + vcvta.u32.f16 s2, s3 +@ CHECK: error: instruction requires: + + vcvtn.u32.f16 s6, s23 +@ CHECK: error: instruction requires: + + vcvtp.u32.f16 s0, s4 +@ CHECK: error: instruction requires: + + vcvtm.u32.f16 s17, s8 +@ CHECK: error: instruction requires: + + vselge.f16 s4, s1, s23 +@ CHECK: error: instruction requires: + + vselgt.f16 s0, s1, s0 +@ CHECK: error: instruction requires: + + vseleq.f16 s30, s28, s23 +@ CHECK: error: instruction requires: + + vselvs.f16 s21, s16, s14 +@ CHECK: error: instruction requires: + + vmaxnm.f16 s5, s12, s0 +@ CHECK: error: instruction requires: + + vminnm.f16 s0, s0, s12 +@ CHECK: error: instruction requires: + + vrintz.f16 s3, s24 +@ CHECK: error: instruction requires: + + vrintr.f16 s0, s9 +@ CHECK: error: instruction requires: + + vrintx.f16 s10, s14 +@ CHECK: error: instruction requires: + + vrinta.f16 s12, s1 +@ CHECK: error: instruction requires: + + vrintn.f16 s12, s1 +@ CHECK: error: instruction requires: + + vrintp.f16 s12, s1 +@ CHECK: error: instruction requires: + + vrintm.f16 s12, s1 +@ CHECK: error: instruction requires: + + vfma.f16 s2, s7, s4 +@ CHECK: error: instruction requires: + + vfms.f16 s2, s7, s4 +@ CHECK: error: instruction requires: + + vfnma.f16 s2, s7, s4 +@ CHECK: error: instruction requires: + + vfnms.f16 s2, s7, s4 +@ CHECK: error: instruction requires: + + vmovx.f16 s2, s5 + vins.f16 s2, s5 +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: + + + vldr.16 s1, [pc, #6] + vldr.16 s2, [pc, #510] + vldr.16 s3, [pc, #-510] + vldr.16 s4, [r4, #-18] +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: + + + vstr.16 s1, [pc, #6] + vstr.16 s2, [pc, #510] + vstr.16 s3, [pc, #-510] + vstr.16 s4, [r4, #-18] +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: + + vmov.f16 s0, #1.0 +@ CHECK: error: instruction requires: + + vmov.f16 s1, r2 + vmov.f16 r3, s4 +@ CHECK: error: instruction requires: +@ CHECK: error: instruction requires: diff --git a/test/MC/ARM/fullfp16.s b/test/MC/ARM/fullfp16.s new file mode 100644 index 00000000000..bb7863c2353 --- /dev/null +++ b/test/MC/ARM/fullfp16.s @@ -0,0 +1,257 @@ +@ RUN: llvm-mc -triple armv8a-none-eabi -mattr=+fullfp16 -show-encoding < %s | FileCheck %s --check-prefix=ARM +@ RUN: llvm-mc -triple armv8a-none-eabi -mattr=+fullfp16,+thumb-mode -show-encoding < %s | FileCheck %s --check-prefix=THUMB + + vadd.f16 s0, s1, s0 +@ ARM: vadd.f16 s0, s1, s0 @ encoding: [0x80,0x09,0x30,0xee] +@ THUMB: vadd.f16 s0, s1, s0 @ encoding: [0x30,0xee,0x80,0x09] + + vsub.f16 s0, s1, s0 +@ ARM: vsub.f16 s0, s1, s0 @ encoding: [0xc0,0x09,0x30,0xee] +@ THUMB: vsub.f16 s0, s1, s0 @ encoding: [0x30,0xee,0xc0,0x09] + + vdiv.f16 s0, s1, s0 +@ ARM: vdiv.f16 s0, s1, s0 @ encoding: [0x80,0x09,0x80,0xee] +@ THUMB: vdiv.f16 s0, s1, s0 @ encoding: [0x80,0xee,0x80,0x09] + + vmul.f16 s0, s1, s0 +@ ARM: vmul.f16 s0, s1, s0 @ encoding: [0x80,0x09,0x20,0xee] +@ THUMB: vmul.f16 s0, s1, s0 @ encoding: [0x20,0xee,0x80,0x09] + + vnmul.f16 s0, s1, s0 +@ ARM: vnmul.f16 s0, s1, s0 @ encoding: [0xc0,0x09,0x20,0xee] +@ THUMB: vnmul.f16 s0, s1, s0 @ encoding: [0x20,0xee,0xc0,0x09] + + vmla.f16 s1, s2, s0 +@ ARM: vmla.f16 s1, s2, s0 @ encoding: [0x00,0x09,0x41,0xee] +@ THUMB: vmla.f16 s1, s2, s0 @ encoding: [0x41,0xee,0x00,0x09] + + vmls.f16 s1, s2, s0 +@ ARM: vmls.f16 s1, s2, s0 @ encoding: [0x40,0x09,0x41,0xee] +@ THUMB: vmls.f16 s1, s2, s0 @ encoding: [0x41,0xee,0x40,0x09] + + vnmla.f16 s1, s2, s0 +@ ARM: vnmla.f16 s1, s2, s0 @ encoding: [0x40,0x09,0x51,0xee] +@ THUMB: vnmla.f16 s1, s2, s0 @ encoding: [0x51,0xee,0x40,0x09] + + vnmls.f16 s1, s2, s0 +@ ARM: vnmls.f16 s1, s2, s0 @ encoding: [0x00,0x09,0x51,0xee] +@ THUMB: vnmls.f16 s1, s2, s0 @ encoding: [0x51,0xee,0x00,0x09] + + vcmp.f16 s0, s1 +@ ARM: vcmp.f16 s0, s1 @ encoding: [0x60,0x09,0xb4,0xee] +@ THUMB: vcmp.f16 s0, s1 @ encoding: [0xb4,0xee,0x60,0x09] + + vcmp.f16 s2, #0 +@ ARM: vcmp.f16 s2, #0 @ encoding: [0x40,0x19,0xb5,0xee] +@ THUMB: vcmp.f16 s2, #0 @ encoding: [0xb5,0xee,0x40,0x19] + + vcmpe.f16 s1, s0 +@ ARM: vcmpe.f16 s1, s0 @ encoding: [0xc0,0x09,0xf4,0xee] +@ THUMB: vcmpe.f16 s1, s0 @ encoding: [0xf4,0xee,0xc0,0x09] + + vcmpe.f16 s0, #0 +@ ARM: vcmpe.f16 s0, #0 @ encoding: [0xc0,0x09,0xb5,0xee] +@ THUMB: vcmpe.f16 s0, #0 @ encoding: [0xb5,0xee,0xc0,0x09] + + vabs.f16 s0, s0 +@ ARM: vabs.f16 s0, s0 @ encoding: [0xc0,0x09,0xb0,0xee] +@ THUMB: vabs.f16 s0, s0 @ encoding: [0xb0,0xee,0xc0,0x09] + + vneg.f16 s0, s0 +@ ARM: vneg.f16 s0, s0 @ encoding: [0x40,0x09,0xb1,0xee] +@ THUMB: vneg.f16 s0, s0 @ encoding: [0xb1,0xee,0x40,0x09] + + vsqrt.f16 s0, s0 +@ ARM: vsqrt.f16 s0, s0 @ encoding: [0xc0,0x09,0xb1,0xee] +@ THUMB: vsqrt.f16 s0, s0 @ encoding: [0xb1,0xee,0xc0,0x09] + + vcvt.f16.s32 s0, s0 + vcvt.f16.u32 s0, s0 + vcvt.s32.f16 s0, s0 + vcvt.u32.f16 s0, s0 +@ ARM: vcvt.f16.s32 s0, s0 @ encoding: [0xc0,0x09,0xb8,0xee] +@ ARM: vcvt.f16.u32 s0, s0 @ encoding: [0x40,0x09,0xb8,0xee] +@ ARM: vcvt.s32.f16 s0, s0 @ encoding: [0xc0,0x09,0xbd,0xee] +@ ARM: vcvt.u32.f16 s0, s0 @ encoding: [0xc0,0x09,0xbc,0xee] +@ THUMB: vcvt.f16.s32 s0, s0 @ encoding: [0xb8,0xee,0xc0,0x09] +@ THUMB: vcvt.f16.u32 s0, s0 @ encoding: [0xb8,0xee,0x40,0x09] +@ THUMB: vcvt.s32.f16 s0, s0 @ encoding: [0xbd,0xee,0xc0,0x09] +@ THUMB: vcvt.u32.f16 s0, s0 @ encoding: [0xbc,0xee,0xc0,0x09] + + vcvtr.s32.f16 s0, s1 + vcvtr.u32.f16 s0, s1 +@ ARM: vcvtr.s32.f16 s0, s1 @ encoding: [0x60,0x09,0xbd,0xee] +@ ARM: vcvtr.u32.f16 s0, s1 @ encoding: [0x60,0x09,0xbc,0xee] +@ THUMB: vcvtr.s32.f16 s0, s1 @ encoding: [0xbd,0xee,0x60,0x09] +@ THUMB: vcvtr.u32.f16 s0, s1 @ encoding: [0xbc,0xee,0x60,0x09] + + vcvt.f16.u32 s0, s0, #20 + vcvt.f16.u16 s0, s0, #1 + vcvt.f16.s32 s1, s1, #20 + vcvt.f16.s16 s17, s17, #1 + vcvt.u32.f16 s12, s12, #20 + vcvt.u16.f16 s28, s28, #1 + vcvt.s32.f16 s1, s1, #20 + vcvt.s16.f16 s17, s17, #1 +@ ARM: vcvt.f16.u32 s0, s0, #20 @ encoding: [0xc6,0x09,0xbb,0xee] +@ ARM: vcvt.f16.u16 s0, s0, #1 @ encoding: [0x67,0x09,0xbb,0xee] +@ ARM: vcvt.f16.s32 s1, s1, #20 @ encoding: [0xc6,0x09,0xfa,0xee] +@ ARM: vcvt.f16.s16 s17, s17, #1 @ encoding: [0x67,0x89,0xfa,0xee] +@ ARM: vcvt.u32.f16 s12, s12, #20 @ encoding: [0xc6,0x69,0xbf,0xee] +@ ARM: vcvt.u16.f16 s28, s28, #1 @ encoding: [0x67,0xe9,0xbf,0xee] +@ ARM: vcvt.s32.f16 s1, s1, #20 @ encoding: [0xc6,0x09,0xfe,0xee] +@ ARM: vcvt.s16.f16 s17, s17, #1 @ encoding: [0x67,0x89,0xfe,0xee] +@ THUMB: vcvt.f16.u32 s0, s0, #20 @ encoding: [0xbb,0xee,0xc6,0x09] +@ THUMB: vcvt.f16.u16 s0, s0, #1 @ encoding: [0xbb,0xee,0x67,0x09] +@ THUMB: vcvt.f16.s32 s1, s1, #20 @ encoding: [0xfa,0xee,0xc6,0x09] +@ THUMB: vcvt.f16.s16 s17, s17, #1 @ encoding: [0xfa,0xee,0x67,0x89] +@ THUMB: vcvt.u32.f16 s12, s12, #20 @ encoding: [0xbf,0xee,0xc6,0x69] +@ THUMB: vcvt.u16.f16 s28, s28, #1 @ encoding: [0xbf,0xee,0x67,0xe9] +@ THUMB: vcvt.s32.f16 s1, s1, #20 @ encoding: [0xfe,0xee,0xc6,0x09] +@ THUMB: vcvt.s16.f16 s17, s17, #1 @ encoding: [0xfe,0xee,0x67,0x89] + + vcvta.s32.f16 s2, s3 +@ ARM: vcvta.s32.f16 s2, s3 @ encoding: [0xe1,0x19,0xbc,0xfe] +@ THUMB: vcvta.s32.f16 s2, s3 @ encoding: [0xbc,0xfe,0xe1,0x19] + + vcvtn.s32.f16 s6, s23 +@ ARM: vcvtn.s32.f16 s6, s23 @ encoding: [0xeb,0x39,0xbd,0xfe] +@ THUMB: vcvtn.s32.f16 s6, s23 @ encoding: [0xbd,0xfe,0xeb,0x39] + + vcvtp.s32.f16 s0, s4 +@ ARM: vcvtp.s32.f16 s0, s4 @ encoding: [0xc2,0x09,0xbe,0xfe] +@ THUMB: vcvtp.s32.f16 s0, s4 @ encoding: [0xbe,0xfe,0xc2,0x09] + + vcvtm.s32.f16 s17, s8 +@ ARM: vcvtm.s32.f16 s17, s8 @ encoding: [0xc4,0x89,0xff,0xfe] +@ THUMB: vcvtm.s32.f16 s17, s8 @ encoding: [0xff,0xfe,0xc4,0x89] + + vcvta.u32.f16 s2, s3 +@ ARM: vcvta.u32.f16 s2, s3 @ encoding: [0x61,0x19,0xbc,0xfe] +@ THUMB: vcvta.u32.f16 s2, s3 @ encoding: [0xbc,0xfe,0x61,0x19] + + vcvtn.u32.f16 s6, s23 +@ ARM: vcvtn.u32.f16 s6, s23 @ encoding: [0x6b,0x39,0xbd,0xfe] +@ THUMB: vcvtn.u32.f16 s6, s23 @ encoding: [0xbd,0xfe,0x6b,0x39] + + vcvtp.u32.f16 s0, s4 +@ ARM: vcvtp.u32.f16 s0, s4 @ encoding: [0x42,0x09,0xbe,0xfe] +@ THUMB: vcvtp.u32.f16 s0, s4 @ encoding: [0xbe,0xfe,0x42,0x09] + + vcvtm.u32.f16 s17, s8 +@ ARM: vcvtm.u32.f16 s17, s8 @ encoding: [0x44,0x89,0xff,0xfe] +@ THUMB: vcvtm.u32.f16 s17, s8 @ encoding: [0xff,0xfe,0x44,0x89] + + vselge.f16 s4, s1, s23 +@ ARM: vselge.f16 s4, s1, s23 @ encoding: [0xab,0x29,0x20,0xfe] +@ THUMB: vselge.f16 s4, s1, s23 @ encoding: [0x20,0xfe,0xab,0x29] + + vselgt.f16 s0, s1, s0 +@ ARM: vselgt.f16 s0, s1, s0 @ encoding: [0x80,0x09,0x30,0xfe] +@ THUMB: vselgt.f16 s0, s1, s0 @ encoding: [0x30,0xfe,0x80,0x09] + + vseleq.f16 s30, s28, s23 +@ ARM: vseleq.f16 s30, s28, s23 @ encoding: [0x2b,0xf9,0x0e,0xfe] +@ THUMB: vseleq.f16 s30, s28, s23 @ encoding: [0x0e,0xfe,0x2b,0xf9] + + vselvs.f16 s21, s16, s14 +@ ARM: vselvs.f16 s21, s16, s14 @ encoding: [0x07,0xa9,0x58,0xfe] +@ THUMB: vselvs.f16 s21, s16, s14 @ encoding: [0x58,0xfe,0x07,0xa9] + + vmaxnm.f16 s5, s12, s0 +@ ARM: vmaxnm.f16 s5, s12, s0 @ encoding: [0x00,0x29,0xc6,0xfe] +@ THUMB: vmaxnm.f16 s5, s12, s0 @ encoding: [0xc6,0xfe,0x00,0x29] + + vminnm.f16 s0, s0, s12 +@ ARM: vminnm.f16 s0, s0, s12 @ encoding: [0x46,0x09,0x80,0xfe] +@ THUMB: vminnm.f16 s0, s0, s12 @ encoding: [0x80,0xfe,0x46,0x09] + + vrintz.f16 s3, s24 +@ ARM: vrintz.f16 s3, s24 @ encoding: [0xcc,0x19,0xf6,0xee] +@ THUMB: vrintz.f16 s3, s24 @ encoding: [0xf6,0xee,0xcc,0x19] + + vrintr.f16 s0, s9 +@ ARM: vrintr.f16 s0, s9 @ encoding: [0x64,0x09,0xb6,0xee] +@ THUMB: vrintr.f16 s0, s9 @ encoding: [0xb6,0xee,0x64,0x09] + + vrintx.f16 s10, s14 +@ ARM: vrintx.f16 s10, s14 @ encoding: [0x47,0x59,0xb7,0xee] +@ THUMB: vrintx.f16 s10, s14 @ encoding: [0xb7,0xee,0x47,0x59] + + vrinta.f16 s12, s1 +@ ARM: vrinta.f16 s12, s1 @ encoding: [0x60,0x69,0xb8,0xfe] +@ THUMB: vrinta.f16 s12, s1 @ encoding: [0xb8,0xfe,0x60,0x69] + + vrintn.f16 s12, s1 +@ ARM: vrintn.f16 s12, s1 @ encoding: [0x60,0x69,0xb9,0xfe] +@ THUMB: vrintn.f16 s12, s1 @ encoding: [0xb9,0xfe,0x60,0x69] + + vrintp.f16 s12, s1 +@ ARM: vrintp.f16 s12, s1 @ encoding: [0x60,0x69,0xba,0xfe] +@ THUMB: vrintp.f16 s12, s1 @ encoding: [0xba,0xfe,0x60,0x69] + + vrintm.f16 s12, s1 +@ ARM: vrintm.f16 s12, s1 @ encoding: [0x60,0x69,0xbb,0xfe] +@ THUMB: vrintm.f16 s12, s1 @ encoding: [0xbb,0xfe,0x60,0x69] + + vfma.f16 s2, s7, s4 +@ ARM: vfma.f16 s2, s7, s4 @ encoding: [0x82,0x19,0xa3,0xee] +@ THUMB: vfma.f16 s2, s7, s4 @ encoding: [0xa3,0xee,0x82,0x19] + + vfms.f16 s2, s7, s4 +@ ARM: vfms.f16 s2, s7, s4 @ encoding: [0xc2,0x19,0xa3,0xee] +@ THUMB: vfms.f16 s2, s7, s4 @ encoding: [0xa3,0xee,0xc2,0x19] + + vfnma.f16 s2, s7, s4 +@ ARM: vfnma.f16 s2, s7, s4 @ encoding: [0xc2,0x19,0x93,0xee] +@ THUMB: vfnma.f16 s2, s7, s4 @ encoding: [0x93,0xee,0xc2,0x19] + + vfnms.f16 s2, s7, s4 +@ ARM: vfnms.f16 s2, s7, s4 @ encoding: [0x82,0x19,0x93,0xee] +@ THUMB: vfnms.f16 s2, s7, s4 @ encoding: [0x93,0xee,0x82,0x19] + + vmovx.f16 s2, s5 + vins.f16 s2, s5 +@ ARM: vmovx.f16 s2, s5 @ encoding: [0x62,0x1a,0xb0,0xfe] +@ ARM: vins.f16 s2, s5 @ encoding: [0xe2,0x1a,0xb0,0xfe] +@ THUMB: vmovx.f16 s2, s5 @ encoding: [0xb0,0xfe,0x62,0x1a] +@ THUMB: vins.f16 s2, s5 @ encoding: [0xb0,0xfe,0xe2,0x1a] + + + vldr.16 s1, [pc, #6] + vldr.16 s2, [pc, #510] + vldr.16 s3, [pc, #-510] + vldr.16 s4, [r4, #-18] +@ ARM: vldr.16 s1, [pc, #6] @ encoding: [0x03,0x09,0xdf,0xed] +@ ARM: vldr.16 s2, [pc, #510] @ encoding: [0xff,0x19,0x9f,0xed] +@ ARM: vldr.16 s3, [pc, #-510] @ encoding: [0xff,0x19,0x5f,0xed] +@ ARM: vldr.16 s4, [r4, #-18] @ encoding: [0x09,0x29,0x14,0xed] +@ THUMB: vldr.16 s1, [pc, #6] @ encoding: [0xdf,0xed,0x03,0x09] +@ THUMB: vldr.16 s2, [pc, #510] @ encoding: [0x9f,0xed,0xff,0x19] +@ THUMB: vldr.16 s3, [pc, #-510] @ encoding: [0x5f,0xed,0xff,0x19] +@ THUMB: vldr.16 s4, [r4, #-18] @ encoding: [0x14,0xed,0x09,0x29] + + + vstr.16 s1, [pc, #6] + vstr.16 s2, [pc, #510] + vstr.16 s3, [pc, #-510] + vstr.16 s4, [r4, #-18] +@ ARM: vstr.16 s1, [pc, #6] @ encoding: [0x03,0x09,0xcf,0xed] +@ ARM: vstr.16 s2, [pc, #510] @ encoding: [0xff,0x19,0x8f,0xed] +@ ARM: vstr.16 s3, [pc, #-510] @ encoding: [0xff,0x19,0x4f,0xed] +@ ARM: vstr.16 s4, [r4, #-18] @ encoding: [0x09,0x29,0x04,0xed] +@ THUMB: vstr.16 s1, [pc, #6] @ encoding: [0xcf,0xed,0x03,0x09] +@ THUMB: vstr.16 s2, [pc, #510] @ encoding: [0x8f,0xed,0xff,0x19] +@ THUMB: vstr.16 s3, [pc, #-510] @ encoding: [0x4f,0xed,0xff,0x19] +@ THUMB: vstr.16 s4, [r4, #-18] @ encoding: [0x04,0xed,0x09,0x29] + + vmov.f16 s0, #1.0 +@ ARM: vmov.f16 s0, #1.000000e+00 @ encoding: [0x00,0x09,0xb7,0xee] +@ THUMB: vmov.f16 s0, #1.000000e+00 @ encoding: [0xb7,0xee,0x00,0x09] + + vmov.f16 s1, r2 + vmov.f16 r3, s4 +@ ARM: vmov.f16 s1, r2 @ encoding: [0x90,0x29,0x00,0xee] +@ ARM: vmov.f16 r3, s4 @ encoding: [0x10,0x39,0x12,0xee] +@ THUMB: vmov.f16 s1, r2 @ encoding: [0x00,0xee,0x90,0x29] +@ THUMB: vmov.f16 r3, s4 @ encoding: [0x12,0xee,0x10,0x39] diff --git a/test/MC/Disassembler/ARM/fullfp16-arm-neg.txt b/test/MC/Disassembler/ARM/fullfp16-arm-neg.txt new file mode 100644 index 00000000000..cd26f09a4e9 --- /dev/null +++ b/test/MC/Disassembler/ARM/fullfp16-arm-neg.txt @@ -0,0 +1,188 @@ +# RUN: not llvm-mc -disassemble -triple armv8a-none-eabi -mattr=-fullfp16 -show-encoding < %s 2>&1 | FileCheck %s + +# CHECK: warning: invalid instruction encoding +[0x80,0x09,0x30,0xee] + +# CHECK: warning: invalid instruction encoding +[0xc0,0x09,0x30,0xee] + +# CHECK: warning: invalid instruction encoding +[0x80,0x09,0x80,0xee] + +# CHECK: warning: invalid instruction encoding +[0x80,0x09,0x20,0xee] + +# CHECK: warning: invalid instruction encoding +[0xc0,0x09,0x20,0xee] + +# CHECK: warning: invalid instruction encoding +[0x00,0x09,0x41,0xee] + +# CHECK: warning: invalid instruction encoding +[0x40,0x09,0x41,0xee] + +# CHECK: warning: invalid instruction encoding +[0x40,0x09,0x51,0xee] + +# CHECK: warning: invalid instruction encoding +[0x00,0x09,0x51,0xee] + +# CHECK: warning: invalid instruction encoding +[0x60,0x09,0xb4,0xee] + +# CHECK: warning: invalid instruction encoding +[0x40,0x19,0xb5,0xee] + +# CHECK: warning: invalid instruction encoding +[0xc0,0x09,0xf4,0xee] + +# CHECK: warning: invalid instruction encoding +[0xc0,0x09,0xb5,0xee] + +# CHECK: warning: invalid instruction encoding +[0xc0,0x09,0xb0,0xee] + +# CHECK: warning: invalid instruction encoding +[0x40,0x09,0xb1,0xee] + +# CHECK: warning: invalid instruction encoding +[0xc0,0x09,0xb1,0xee] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0xc0,0x09,0xb8,0xee] +[0x40,0x09,0xb8,0xee] +[0xc0,0x09,0xbd,0xee] +[0xc0,0x09,0xbc,0xee] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0x60,0x09,0xbd,0xee] +[0x60,0x09,0xbc,0xee] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0xc6,0x09,0xbb,0xee] +[0x67,0x09,0xbb,0xee] +[0xc6,0x09,0xfa,0xee] +[0x67,0x89,0xfa,0xee] +[0xc6,0x69,0xbf,0xee] +[0x67,0xe9,0xbf,0xee] +[0xc6,0x09,0xfe,0xee] +[0x67,0x89,0xfe,0xee] + +# CHECK: warning: invalid instruction encoding +[0xe1,0x19,0xbc,0xfe] + +# CHECK: warning: invalid instruction encoding +[0xeb,0x39,0xbd,0xfe] + +# CHECK: warning: invalid instruction encoding +[0xc2,0x09,0xbe,0xfe] + +# CHECK: warning: invalid instruction encoding +[0xc4,0x89,0xff,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x61,0x19,0xbc,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x6b,0x39,0xbd,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x42,0x09,0xbe,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x44,0x89,0xff,0xfe] + +# CHECK: warning: invalid instruction encoding +[0xab,0x29,0x20,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x80,0x09,0x30,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x2b,0xf9,0x0e,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x07,0xa9,0x58,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x00,0x29,0xc6,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x46,0x09,0x80,0xfe] + +# CHECK: warning: invalid instruction encoding +[0xcc,0x19,0xf6,0xee] + +# CHECK: warning: invalid instruction encoding +[0x64,0x09,0xb6,0xee] + +# CHECK: warning: invalid instruction encoding +[0x47,0x59,0xb7,0xee] + +# CHECK: warning: invalid instruction encoding +[0x60,0x69,0xb8,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x60,0x69,0xb9,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x60,0x69,0xba,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x60,0x69,0xbb,0xfe] + +# CHECK: warning: invalid instruction encoding +[0x82,0x19,0xa3,0xee] + +# CHECK: warning: invalid instruction encoding +[0xc2,0x19,0xa3,0xee] + +# CHECK: warning: invalid instruction encoding +[0xc2,0x19,0x93,0xee] + +# CHECK: warning: invalid instruction encoding +[0x82,0x19,0x93,0xee] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0x62,0x1a,0xb0,0xfe] +[0xe2,0x1a,0xb0,0xfe] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0x03,0x09,0xdf,0xed] +[0xff,0x19,0x9f,0xed] +[0xff,0x19,0x5f,0xed] +[0x09,0x29,0x14,0xed] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0x03,0x09,0xcf,0xed] +[0xff,0x19,0x8f,0xed] +[0xff,0x19,0x4f,0xed] +[0x09,0x29,0x04,0xed] + +# CHECK: warning: invalid instruction encoding +[0x00,0x09,0xb7,0xee] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0x90,0x29,0x00,0xee] +[0x10,0x39,0x12,0xee] + +# CHECK-NOT: warning: invalid instruction encoding diff --git a/test/MC/Disassembler/ARM/fullfp16-arm.txt b/test/MC/Disassembler/ARM/fullfp16-arm.txt new file mode 100644 index 00000000000..8a7ce68933e --- /dev/null +++ b/test/MC/Disassembler/ARM/fullfp16-arm.txt @@ -0,0 +1,186 @@ +# RUN: llvm-mc -disassemble -triple armv8a-none-eabi -mattr=+fullfp16 -show-encoding < %s | FileCheck %s + +# CHECK: vadd.f16 s0, s1, s0 +[0x80,0x09,0x30,0xee] + +# CHECK: vsub.f16 s0, s1, s0 +[0xc0,0x09,0x30,0xee] + +# CHECK: vdiv.f16 s0, s1, s0 +[0x80,0x09,0x80,0xee] + +# CHECK: vmul.f16 s0, s1, s0 +[0x80,0x09,0x20,0xee] + +# CHECK: vnmul.f16 s0, s1, s0 +[0xc0,0x09,0x20,0xee] + +# CHECK: vmla.f16 s1, s2, s0 +[0x00,0x09,0x41,0xee] + +# CHECK: vmls.f16 s1, s2, s0 +[0x40,0x09,0x41,0xee] + +# CHECK: vnmla.f16 s1, s2, s0 +[0x40,0x09,0x51,0xee] + +# CHECK: vnmls.f16 s1, s2, s0 +[0x00,0x09,0x51,0xee] + +# CHECK: vcmp.f16 s0, s1 +[0x60,0x09,0xb4,0xee] + +# CHECK: vcmp.f16 s2, #0 +[0x40,0x19,0xb5,0xee] + +# CHECK: vcmpe.f16 s1, s0 +[0xc0,0x09,0xf4,0xee] + +# CHECK: vcmpe.f16 s0, #0 +[0xc0,0x09,0xb5,0xee] + +# CHECK: vabs.f16 s0, s0 +[0xc0,0x09,0xb0,0xee] + +# CHECK: vneg.f16 s0, s0 +[0x40,0x09,0xb1,0xee] + +# CHECK: vsqrt.f16 s0, s0 +[0xc0,0x09,0xb1,0xee] + +# CHECK: vcvt.f16.s32 s0, s0 +# CHECK: vcvt.f16.u32 s0, s0 +# CHECK: vcvt.s32.f16 s0, s0 +# CHECK: vcvt.u32.f16 s0, s0 +[0xc0,0x09,0xb8,0xee] +[0x40,0x09,0xb8,0xee] +[0xc0,0x09,0xbd,0xee] +[0xc0,0x09,0xbc,0xee] + +# CHECK: vcvtr.s32.f16 s0, s1 +# CHECK: vcvtr.u32.f16 s0, s1 +[0x60,0x09,0xbd,0xee] +[0x60,0x09,0xbc,0xee] + +# CHECK: vcvt.f16.u32 s0, s0, #20 +# CHECK: vcvt.f16.u16 s0, s0, #1 +# CHECK: vcvt.f16.s32 s1, s1, #20 +# CHECK: vcvt.f16.s16 s17, s17, #1 +# CHECK: vcvt.u32.f16 s12, s12, #20 +# CHECK: vcvt.u16.f16 s28, s28, #1 +# CHECK: vcvt.s32.f16 s1, s1, #20 +# CHECK: vcvt.s16.f16 s17, s17, #1 +[0xc6,0x09,0xbb,0xee] +[0x67,0x09,0xbb,0xee] +[0xc6,0x09,0xfa,0xee] +[0x67,0x89,0xfa,0xee] +[0xc6,0x69,0xbf,0xee] +[0x67,0xe9,0xbf,0xee] +[0xc6,0x09,0xfe,0xee] +[0x67,0x89,0xfe,0xee] + +# CHECK: vcvta.s32.f16 s2, s3 +[0xe1,0x19,0xbc,0xfe] + +# CHECK: vcvtn.s32.f16 s6, s23 +[0xeb,0x39,0xbd,0xfe] + +# CHECK: vcvtp.s32.f16 s0, s4 +[0xc2,0x09,0xbe,0xfe] + +# CHECK: vcvtm.s32.f16 s17, s8 +[0xc4,0x89,0xff,0xfe] + +# CHECK: vcvta.u32.f16 s2, s3 +[0x61,0x19,0xbc,0xfe] + +# CHECK: vcvtn.u32.f16 s6, s23 +[0x6b,0x39,0xbd,0xfe] + +# CHECK: vcvtp.u32.f16 s0, s4 +[0x42,0x09,0xbe,0xfe] + +# CHECK: vcvtm.u32.f16 s17, s8 +[0x44,0x89,0xff,0xfe] + +# CHECK: vselge.f16 s4, s1, s23 +[0xab,0x29,0x20,0xfe] + +# CHECK: vselgt.f16 s0, s1, s0 +[0x80,0x09,0x30,0xfe] + +# CHECK: vseleq.f16 s30, s28, s23 +[0x2b,0xf9,0x0e,0xfe] + +# CHECK: vselvs.f16 s21, s16, s14 +[0x07,0xa9,0x58,0xfe] + +# CHECK: vmaxnm.f16 s5, s12, s0 +[0x00,0x29,0xc6,0xfe] + +# CHECK: vminnm.f16 s0, s0, s12 +[0x46,0x09,0x80,0xfe] + +# CHECK: vrintz.f16 s3, s24 +[0xcc,0x19,0xf6,0xee] + +# CHECK: vrintr.f16 s0, s9 +[0x64,0x09,0xb6,0xee] + +# CHECK: vrintx.f16 s10, s14 +[0x47,0x59,0xb7,0xee] + +# CHECK: vrinta.f16 s12, s1 +[0x60,0x69,0xb8,0xfe] + +# CHECK: vrintn.f16 s12, s1 +[0x60,0x69,0xb9,0xfe] + +# CHECK: vrintp.f16 s12, s1 +[0x60,0x69,0xba,0xfe] + +# CHECK: vrintm.f16 s12, s1 +[0x60,0x69,0xbb,0xfe] + +# CHECK: vfma.f16 s2, s7, s4 +[0x82,0x19,0xa3,0xee] + +# CHECK: vfms.f16 s2, s7, s4 +[0xc2,0x19,0xa3,0xee] + +# CHECK: vfnma.f16 s2, s7, s4 +[0xc2,0x19,0x93,0xee] + +# CHECK: vfnms.f16 s2, s7, s4 +[0x82,0x19,0x93,0xee] + +# CHECK: vmovx.f16 s2, s5 +# CHECK: vins.f16 s2, s5 +[0x62,0x1a,0xb0,0xfe] +[0xe2,0x1a,0xb0,0xfe] + +# CHECK: vldr.16 s1, [pc, #6] +# CHECK: vldr.16 s2, [pc, #510] +# CHECK: vldr.16 s3, [pc, #-510] +# CHECK: vldr.16 s4, [r4, #-18] +[0x03,0x09,0xdf,0xed] +[0xff,0x19,0x9f,0xed] +[0xff,0x19,0x5f,0xed] +[0x09,0x29,0x14,0xed] + +# CHECK: vstr.16 s1, [pc, #6] +# CHECK: vstr.16 s2, [pc, #510] +# CHECK: vstr.16 s3, [pc, #-510] +# CHECK: vstr.16 s4, [r4, #-18] +[0x03,0x09,0xcf,0xed] +[0xff,0x19,0x8f,0xed] +[0xff,0x19,0x4f,0xed] +[0x09,0x29,0x04,0xed] + +# CHECK: vmov.f16 s0, #1.0 +[0x00,0x09,0xb7,0xee] + +# CHECK: vmov.f16 s1, r2 +# CHECK: vmov.f16 r3, s4 +[0x90,0x29,0x00,0xee] +[0x10,0x39,0x12,0xee] diff --git a/test/MC/Disassembler/ARM/fullfp16-thumb-neg.txt b/test/MC/Disassembler/ARM/fullfp16-thumb-neg.txt new file mode 100644 index 00000000000..ecb8fabd4ca --- /dev/null +++ b/test/MC/Disassembler/ARM/fullfp16-thumb-neg.txt @@ -0,0 +1,186 @@ +# RUN: not llvm-mc -disassemble -triple thumbv8a-none-eabi -mattr=-fullfp16,+thumb-mode -show-encoding < %s 2>&1 | FileCheck %s + +# CHECK: warning: invalid instruction encoding +[0x30,0xee,0x80,0x09] + +# CHECK: warning: invalid instruction encoding +[0x30,0xee,0xc0,0x09] + +# CHECK: warning: invalid instruction encoding +[0x80,0xee,0x80,0x09] + +# CHECK: warning: invalid instruction encoding +[0x20,0xee,0x80,0x09] + +# CHECK: warning: invalid instruction encoding +[0x20,0xee,0xc0,0x09] + +# CHECK: warning: invalid instruction encoding +[0x41,0xee,0x00,0x09] + +# CHECK: warning: invalid instruction encoding +[0x41,0xee,0x40,0x09] + +# CHECK: warning: invalid instruction encoding +[0x51,0xee,0x40,0x09] + +# CHECK: warning: invalid instruction encoding +[0x51,0xee,0x00,0x09] + +# CHECK: warning: invalid instruction encoding +[0xb4,0xee,0x60,0x09] + +# CHECK: warning: invalid instruction encoding +[0xb5,0xee,0x40,0x19] + +# CHECK: warning: invalid instruction encoding +[0xf4,0xee,0xc0,0x09] + +# CHECK: warning: invalid instruction encoding +[0xb5,0xee,0xc0,0x09] + +# CHECK: warning: invalid instruction encoding +[0xb0,0xee,0xc0,0x09] + +# CHECK: warning: invalid instruction encoding +[0xb1,0xee,0x40,0x09] + +# CHECK: warning: invalid instruction encoding +[0xb1,0xee,0xc0,0x09] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0xb8,0xee,0xc0,0x09] +[0xb8,0xee,0x40,0x09] +[0xbd,0xee,0xc0,0x09] +[0xbc,0xee,0xc0,0x09] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0xbd,0xee,0x60,0x09] +[0xbc,0xee,0x60,0x09] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0xbb,0xee,0xc6,0x09] +[0xbb,0xee,0x67,0x09] +[0xfa,0xee,0xc6,0x09] +[0xfa,0xee,0x67,0x89] +[0xbf,0xee,0xc6,0x69] +[0xbf,0xee,0x67,0xe9] +[0xfe,0xee,0xc6,0x09] +[0xfe,0xee,0x67,0x89] + +# CHECK: warning: invalid instruction encoding +[0xbc,0xfe,0xe1,0x19] + +# CHECK: warning: invalid instruction encoding +[0xbd,0xfe,0xeb,0x39] + +# CHECK: warning: invalid instruction encoding +[0xbe,0xfe,0xc2,0x09] + +# CHECK: warning: invalid instruction encoding +[0xff,0xfe,0xc4,0x89] + +# CHECK: warning: invalid instruction encoding +[0xbc,0xfe,0x61,0x19] + +# CHECK: warning: invalid instruction encoding +[0xbd,0xfe,0x6b,0x39] + +# CHECK: warning: invalid instruction encoding +[0xbe,0xfe,0x42,0x09] + +# CHECK: warning: invalid instruction encoding +[0xff,0xfe,0x44,0x89] + +# CHECK: warning: invalid instruction encoding +[0x20,0xfe,0xab,0x29] + +# CHECK: warning: invalid instruction encoding +[0x30,0xfe,0x80,0x09] + +# CHECK: warning: invalid instruction encoding +[0x0e,0xfe,0x2b,0xf9] + +# CHECK: warning: invalid instruction encoding +[0x58,0xfe,0x07,0xa9] + +# CHECK: warning: invalid instruction encoding +[0xc6,0xfe,0x00,0x29] + +# CHECK: warning: invalid instruction encoding +[0x80,0xfe,0x46,0x09] + +# CHECK: warning: invalid instruction encoding +[0xf6,0xee,0xcc,0x19] + +# CHECK: warning: invalid instruction encoding +[0xb6,0xee,0x64,0x09] + +# CHECK: warning: invalid instruction encoding +[0xb7,0xee,0x47,0x59] + +# CHECK: warning: invalid instruction encoding +[0xb8,0xfe,0x60,0x69] + +# CHECK: warning: invalid instruction encoding +[0xb9,0xfe,0x60,0x69] + +# CHECK: warning: invalid instruction encoding +[0xba,0xfe,0x60,0x69] + +# CHECK: warning: invalid instruction encoding +[0xbb,0xfe,0x60,0x69] + +# CHECK: warning: invalid instruction encoding +[0xa3,0xee,0x82,0x19] + +# CHECK: warning: invalid instruction encoding +[0xa3,0xee,0xc2,0x19] + +# CHECK: warning: invalid instruction encoding +[0x93,0xee,0xc2,0x19] + +# CHECK: warning: invalid instruction encoding +[0x93,0xee,0x82,0x19] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0xb0,0xfe,0x62,0x1a] +[0xb0,0xfe,0xe2,0x1a] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0xdf,0xed,0x03,0x09] +[0x9f,0xed,0xff,0x19] +[0x5f,0xed,0xff,0x19] +[0x14,0xed,0x09,0x29] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0xcf,0xed,0x03,0x09] +[0x8f,0xed,0xff,0x19] +[0x4f,0xed,0xff,0x19] +[0x04,0xed,0x09,0x29] + +# CHECK: warning: invalid instruction encoding +[0xb7,0xee,0x00,0x09] + +# CHECK: warning: invalid instruction encoding +# CHECK: warning: invalid instruction encoding +[0x00,0xee,0x90,0x29] +[0x12,0xee,0x10,0x39] diff --git a/test/MC/Disassembler/ARM/fullfp16-thumb.txt b/test/MC/Disassembler/ARM/fullfp16-thumb.txt new file mode 100644 index 00000000000..45117205143 --- /dev/null +++ b/test/MC/Disassembler/ARM/fullfp16-thumb.txt @@ -0,0 +1,186 @@ +# RUN: llvm-mc -disassemble -triple thumbv8a-none-eabi -mattr=+fullfp16,+thumb-mode -show-encoding < %s | FileCheck %s + +# CHECK: vadd.f16 s0, s1, s0 +[0x30,0xee,0x80,0x09] + +# CHECK: vsub.f16 s0, s1, s0 +[0x30,0xee,0xc0,0x09] + +# CHECK: vdiv.f16 s0, s1, s0 +[0x80,0xee,0x80,0x09] + +# CHECK: vmul.f16 s0, s1, s0 +[0x20,0xee,0x80,0x09] + +# CHECK: vnmul.f16 s0, s1, s0 +[0x20,0xee,0xc0,0x09] + +# CHECK: vmla.f16 s1, s2, s0 +[0x41,0xee,0x00,0x09] + +# CHECK: vmls.f16 s1, s2, s0 +[0x41,0xee,0x40,0x09] + +# CHECK: vnmla.f16 s1, s2, s0 +[0x51,0xee,0x40,0x09] + +# CHECK: vnmls.f16 s1, s2, s0 +[0x51,0xee,0x00,0x09] + +# CHECK: vcmp.f16 s0, s1 +[0xb4,0xee,0x60,0x09] + +# CHECK: vcmp.f16 s2, #0 +[0xb5,0xee,0x40,0x19] + +# CHECK: vcmpe.f16 s1, s0 +[0xf4,0xee,0xc0,0x09] + +# CHECK: vcmpe.f16 s0, #0 +[0xb5,0xee,0xc0,0x09] + +# CHECK: vabs.f16 s0, s0 +[0xb0,0xee,0xc0,0x09] + +# CHECK: vneg.f16 s0, s0 +[0xb1,0xee,0x40,0x09] + +# CHECK: vsqrt.f16 s0, s0 +[0xb1,0xee,0xc0,0x09] + +# CHECK: vcvt.f16.s32 s0, s0 +# CHECK: vcvt.f16.u32 s0, s0 +# CHECK: vcvt.s32.f16 s0, s0 +# CHECK: vcvt.u32.f16 s0, s0 +[0xb8,0xee,0xc0,0x09] +[0xb8,0xee,0x40,0x09] +[0xbd,0xee,0xc0,0x09] +[0xbc,0xee,0xc0,0x09] + +# CHECK: vcvtr.s32.f16 s0, s1 +# CHECK: vcvtr.u32.f16 s0, s1 +[0xbd,0xee,0x60,0x09] +[0xbc,0xee,0x60,0x09] + +# CHECK: vcvt.f16.u32 s0, s0, #20 +# CHECK: vcvt.f16.u16 s0, s0, #1 +# CHECK: vcvt.f16.s32 s1, s1, #20 +# CHECK: vcvt.f16.s16 s17, s17, #1 +# CHECK: vcvt.u32.f16 s12, s12, #20 +# CHECK: vcvt.u16.f16 s28, s28, #1 +# CHECK: vcvt.s32.f16 s1, s1, #20 +# CHECK: vcvt.s16.f16 s17, s17, #1 +[0xbb,0xee,0xc6,0x09] +[0xbb,0xee,0x67,0x09] +[0xfa,0xee,0xc6,0x09] +[0xfa,0xee,0x67,0x89] +[0xbf,0xee,0xc6,0x69] +[0xbf,0xee,0x67,0xe9] +[0xfe,0xee,0xc6,0x09] +[0xfe,0xee,0x67,0x89] + +# CHECK: vcvta.s32.f16 s2, s3 +[0xbc,0xfe,0xe1,0x19] + +# CHECK: vcvtn.s32.f16 s6, s23 +[0xbd,0xfe,0xeb,0x39] + +# CHECK: vcvtp.s32.f16 s0, s4 +[0xbe,0xfe,0xc2,0x09] + +# CHECK: vcvtm.s32.f16 s17, s8 +[0xff,0xfe,0xc4,0x89] + +# CHECK: vcvta.u32.f16 s2, s3 +[0xbc,0xfe,0x61,0x19] + +# CHECK: vcvtn.u32.f16 s6, s23 +[0xbd,0xfe,0x6b,0x39] + +# CHECK: vcvtp.u32.f16 s0, s4 +[0xbe,0xfe,0x42,0x09] + +# CHECK: vcvtm.u32.f16 s17, s8 +[0xff,0xfe,0x44,0x89] + +# CHECK: vselge.f16 s4, s1, s23 +[0x20,0xfe,0xab,0x29] + +# CHECK: vselgt.f16 s0, s1, s0 +[0x30,0xfe,0x80,0x09] + +# CHECK: vseleq.f16 s30, s28, s23 +[0x0e,0xfe,0x2b,0xf9] + +# CHECK: vselvs.f16 s21, s16, s14 +[0x58,0xfe,0x07,0xa9] + +# CHECK: vmaxnm.f16 s5, s12, s0 +[0xc6,0xfe,0x00,0x29] + +# CHECK: vminnm.f16 s0, s0, s12 +[0x80,0xfe,0x46,0x09] + +# CHECK: vrintz.f16 s3, s24 +[0xf6,0xee,0xcc,0x19] + +# CHECK: vrintr.f16 s0, s9 +[0xb6,0xee,0x64,0x09] + +# CHECK: vrintx.f16 s10, s14 +[0xb7,0xee,0x47,0x59] + +# CHECK: vrinta.f16 s12, s1 +[0xb8,0xfe,0x60,0x69] + +# CHECK: vrintn.f16 s12, s1 +[0xb9,0xfe,0x60,0x69] + +# CHECK: vrintp.f16 s12, s1 +[0xba,0xfe,0x60,0x69] + +# CHECK: vrintm.f16 s12, s1 +[0xbb,0xfe,0x60,0x69] + +# CHECK: vfma.f16 s2, s7, s4 +[0xa3,0xee,0x82,0x19] + +# CHECK: vfms.f16 s2, s7, s4 +[0xa3,0xee,0xc2,0x19] + +# CHECK: vfnma.f16 s2, s7, s4 +[0x93,0xee,0xc2,0x19] + +# CHECK: vfnms.f16 s2, s7, s4 +[0x93,0xee,0x82,0x19] + +# CHECK: vmovx.f16 s2, s5 +# CHECK: vins.f16 s2, s5 +[0xb0,0xfe,0x62,0x1a] +[0xb0,0xfe,0xe2,0x1a] + +# CHECK: vldr.16 s1, [pc, #6] +# CHECK: vldr.16 s2, [pc, #510] +# CHECK: vldr.16 s3, [pc, #-510] +# CHECK: vldr.16 s4, [r4, #-18] +[0xdf,0xed,0x03,0x09] +[0x9f,0xed,0xff,0x19] +[0x5f,0xed,0xff,0x19] +[0x14,0xed,0x09,0x29] + +# CHECK: vstr.16 s1, [pc, #6] +# CHECK: vstr.16 s2, [pc, #510] +# CHECK: vstr.16 s3, [pc, #-510] +# CHECK: vstr.16 s4, [r4, #-18] +[0xcf,0xed,0x03,0x09] +[0x8f,0xed,0xff,0x19] +[0x4f,0xed,0xff,0x19] +[0x04,0xed,0x09,0x29] + +# CHECK: vmov.f16 s0, #1.0 +[0xb7,0xee,0x00,0x09] + +# CHECK: vmov.f16 s1, r2 +# CHECK: vmov.f16 r3, s4 +[0x00,0xee,0x90,0x29] +[0x12,0xee,0x10,0x39] -- 2.34.1