[ARM] Add ARMv8.2-A FP16 vector instructions
authorOliver Stannard <oliver.stannard@arm.com>
Wed, 16 Dec 2015 12:37:39 +0000 (12:37 +0000)
committerOliver Stannard <oliver.stannard@arm.com>
Wed, 16 Dec 2015 12:37:39 +0000 (12:37 +0000)
ARMv8.2-A adds 16-bit floating point versions of all existing SIMD
floating-point instructions. This is an optional extension, so all of
these instructions require the FeatureFullFP16 subtarget feature.

Note that VFP without SIMD is not a valid combination for any version of
ARMv8-A, but I have ensured that these instructions all depend on both
FeatureNEON and FeatureFullFP16 for consistency.

Differential Revision: http://reviews.llvm.org/D15039

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255764 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/ARM/ARMInstrNEON.td
lib/Target/ARM/ARMRegisterInfo.td
lib/Target/ARM/AsmParser/ARMAsmParser.cpp
lib/Target/ARM/Disassembler/ARMDisassembler.cpp
test/MC/ARM/fullfp16-neon-neg.s [new file with mode: 0644]
test/MC/ARM/fullfp16-neon.s [new file with mode: 0644]
test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt [new file with mode: 0644]
test/MC/Disassembler/ARM/fullfp16-neon-arm.txt [new file with mode: 0644]
test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt [new file with mode: 0644]
test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt [new file with mode: 0644]

index d43535b4e67cbd9db2652f5ee43a9086d1b4e496..7020ffb41b643e8c25e7c69de6a9b7f75d587e72 100644 (file)
@@ -2460,17 +2460,17 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
         [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
 
 // Same as above, but not predicated.
-class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
-  : N2Vnp<0b10, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
+  : N2Vnp<op19_18, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
           itin, OpcodeStr, Dt,
           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
 
-class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
-  : N2Vnp<0b10, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
+  : N2Vnp<op19_18, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
           itin, OpcodeStr, Dt,
           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
 
@@ -3250,6 +3250,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                   [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
     let Inst{10} = 1; // overwrite F = 1
   }
+  def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, "f16", asm, "",
+                  [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
+              Requires<[HasNEON,HasFullFP16]> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
 
   // 128-bit vector types.
   def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
@@ -3270,6 +3277,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                   [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
     let Inst{10} = 1; // overwrite F = 1
   }
+  def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, "f16", asm, "",
+                  [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
+              Requires<[HasNEON,HasFullFP16]> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
 }
 
 
@@ -4105,6 +4119,12 @@ def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
                      v2f32, v2f32, fadd, 1>;
 def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
                      v4f32, v4f32, fadd, 1>;
+def  VADDhd   : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
+                     v4f16, v4f16, fadd, 1>,
+                Requires<[HasNEON,HasFullFP16]>;
+def  VADDhq   : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
+                     v8f16, v8f16, fadd, 1>,
+                Requires<[HasNEON,HasFullFP16]>;
 //   VADDL    : Vector Add Long (Q = D + D)
 defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
                             "vaddl", "s", add, sext, 1>;
@@ -4160,10 +4180,21 @@ def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
                      v2f32, v2f32, fmul, 1>;
 def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
                      v4f32, v4f32, fmul, 1>;
+def  VMULhd   : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
+                     v4f16, v4f16, fmul, 1>,
+                Requires<[HasNEON,HasFullFP16]>;
+def  VMULhq   : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
+                     v8f16, v8f16, fmul, 1>,
+                Requires<[HasNEON,HasFullFP16]>;
 defm VMULsl   : N3VSL_HS<0b1000, "vmul", mul>;
 def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
 def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
                        v2f32, fmul>;
+def  VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
+                Requires<[HasNEON,HasFullFP16]>;
+def  VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
+                       v4f16, fmul>,
+                Requires<[HasNEON,HasFullFP16]>;
 
 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
                       (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
@@ -4272,6 +4303,12 @@ def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
 def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
                           v4f32, fmul_su, fadd_mlx>,
                 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def  VMLAhd   : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
+                          v4f16, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def  VMLAhq   : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
+                          v8f16, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
 defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
 def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4280,6 +4317,12 @@ def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
 def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
                             v4f32, v2f32, fmul_su, fadd_mlx>,
                 Requires<[HasNEON, UseFPVMLx]>;
+def  VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
+                            v4f16, fmul, fadd>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def  VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
+                            v8f16, v4f16, fmul, fadd>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 
 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
                   (mul (v8i16 QPR:$src2),
@@ -4490,6 +4533,12 @@ def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
 def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
                           v4f32, fmul_su, fsub_mlx>,
                 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def  VMLShd   : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
+                          v4f16, fmul, fsub>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def  VMLShq   : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
+                          v8f16, fmul, fsub>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
 defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
 def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4498,6 +4547,12 @@ def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
 def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
                             v4f32, v2f32, fmul_su, fsub_mlx>,
                 Requires<[HasNEON, UseFPVMLx]>;
+def  VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
+                            v4f16, fmul, fsub>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def  VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
+                            v8f16, v4f16, fmul, fsub>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 
 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
                   (mul (v8i16 QPR:$src2),
@@ -4565,6 +4620,13 @@ def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
 def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
                           v4f32, fmul_su, fadd_mlx>,
                 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def  VFMAhd   : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
+                          v4f16, fmul, fadd>,
+                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+
+def  VFMAhq   : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
+                          v8f16, fmul, fadd>,
+                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
 
 //   Fused Vector Multiply Subtract (floating-point)
 def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
@@ -4573,6 +4635,12 @@ def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
 def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
                           v4f32, fmul_su, fsub_mlx>,
                 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def  VFMShd   : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
+                          v4f16, fmul, fsub>,
+                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+def  VFMShq   : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
+                          v8f16, fmul, fsub>,
+                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
 
 // Match @llvm.fma.* intrinsics
 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
@@ -4597,6 +4665,12 @@ def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
                      v2f32, v2f32, fsub, 0>;
 def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
                      v4f32, v4f32, fsub, 0>;
+def  VSUBhd   : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
+                     v4f16, v4f16, fsub, 0>,
+                Requires<[HasNEON,HasFullFP16]>;
+def  VSUBhq   : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
+                     v8f16, v8f16, fsub, 0>,
+                Requires<[HasNEON,HasFullFP16]>;
 //   VSUBL    : Vector Subtract Long (Q = D - D)
 defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
                             "vsubl", "s", sub, sext, 0>;
@@ -4641,6 +4715,12 @@ def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
                      NEONvceq, 1>;
 def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
                      NEONvceq, 1>;
+def  VCEQhd   : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
+                     NEONvceq, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCEQhq   : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
+                     NEONvceq, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 let TwoOperandAliasConstraint = "$Vm = $Vd" in
 defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
@@ -4655,6 +4735,12 @@ def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
                      NEONvcge, 0>;
 def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
                      NEONvcge, 0>;
+def  VCGEhd   : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
+                     NEONvcge, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCGEhq   : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
+                     NEONvcge, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
 defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
@@ -4672,6 +4758,12 @@ def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
                      NEONvcgt, 0>;
 def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
                      NEONvcgt, 0>;
+def  VCGThd   : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
+                     NEONvcgt, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCGThq   : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
+                     NEONvcgt, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
 defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
@@ -4681,36 +4773,68 @@ defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
 }
 
 //   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+def  VACGEfd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
                         "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
-def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+def  VACGEfq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
                         "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
+def  VACGEhd   : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+                        "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
+                 Requires<[HasNEON, HasFullFP16]>;
+def  VACGEhq   : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+                        "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
+                 Requires<[HasNEON, HasFullFP16]>;
 //   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
-def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+def  VACGTfd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
                         "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
-def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+def  VACGTfq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
                         "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
+def  VACGThd   : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+                        "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
+                 Requires<[HasNEON, HasFullFP16]>;
+def  VACGThq   : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+                        "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
+                 Requires<[HasNEON, HasFullFP16]>;
 //   VTST     : Vector Test Bits
 defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                         IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
 
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
-                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
-                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
-                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
-                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+}
 
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
-                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
-                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
-                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
-                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+}
 
 // Vector Bitwise Operations.
 
@@ -5002,6 +5126,12 @@ def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
                         "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
 def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
                         "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+def  VABDhd   : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
+                        "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VABDhq   : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+                        "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
 defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
@@ -5059,17 +5189,33 @@ def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
 def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
                         "vmax", "f32",
                         v4f32, v4f32, fmaxnan, 1>;
+def  VMAXhd   : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmax", "f16",
+                        v4f16, v4f16, fmaxnan, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VMAXhq   : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmax", "f16",
+                        v8f16, v8f16, fmaxnan, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 // VMAXNM
 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-  def VMAXNMND  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+  def VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
                             N3RegFrm, NoItinerary, "vmaxnm", "f32",
                             v2f32, v2f32, fmaxnum, 1>,
                             Requires<[HasV8, HasNEON]>;
-  def VMAXNMNQ  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+  def VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
                             N3RegFrm, NoItinerary, "vmaxnm", "f32",
                             v4f32, v4f32, fmaxnum, 1>,
                             Requires<[HasV8, HasNEON]>;
+  def VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
+                            N3RegFrm, NoItinerary, "vmaxnm", "f16",
+                            v4f16, v4f16, fmaxnum, 1>,
+                            Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
+                            N3RegFrm, NoItinerary, "vmaxnm", "f16",
+                            v8f16, v8f16, fmaxnum, 1>,
+                            Requires<[HasV8, HasNEON, HasFullFP16]>;
 }
 
 //   VMIN     : Vector Minimum
@@ -5085,17 +5231,33 @@ def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
 def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
                         "vmin", "f32",
                         v4f32, v4f32, fminnan, 1>;
+def  VMINhd   : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmin", "f16",
+                        v4f16, v4f16, fminnan, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VMINhq   : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmin", "f16",
+                        v8f16, v8f16, fminnan, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 // VMINNM
 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-  def VMINNMND  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+  def VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
                             N3RegFrm, NoItinerary, "vminnm", "f32",
                             v2f32, v2f32, fminnum, 1>,
                             Requires<[HasV8, HasNEON]>;
-  def VMINNMNQ  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+  def VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
                             N3RegFrm, NoItinerary, "vminnm", "f32",
                             v4f32, v4f32, fminnum, 1>,
                             Requires<[HasV8, HasNEON]>;
+  def VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
+                            N3RegFrm, NoItinerary, "vminnm", "f16",
+                            v4f16, v4f16, fminnum, 1>,
+                            Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
+                            N3RegFrm, NoItinerary, "vminnm", "f16",
+                            v8f16, v8f16, fminnum, 1>,
+                            Requires<[HasV8, HasNEON, HasFullFP16]>;
 }
 
 // Vector Pairwise Operations.
@@ -5113,6 +5275,10 @@ def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
 def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
                         IIC_VPBIND, "vpadd", "f32",
                         v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def  VPADDh   : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
+                        IIC_VPBIND, "vpadd", "f16",
+                        v4f16, v4f16, int_arm_neon_vpadd, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VPADDL   : Vector Pairwise Add Long
 defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -5141,6 +5307,9 @@ def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
 def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
                         "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+def  VPMAXh   : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+                        "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VPMIN    : Vector Pairwise Minimum
 def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
@@ -5157,6 +5326,9 @@ def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
 def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
                         "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+def  VPMINh   : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+                        "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
 
@@ -5173,6 +5345,14 @@ def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
 def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
                         IIC_VUNAQ, "vrecpe", "f32",
                         v4f32, v4f32, int_arm_neon_vrecpe>;
+def  VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+                        IIC_VUNAD, "vrecpe", "f16",
+                        v4f16, v4f16, int_arm_neon_vrecpe>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+                        IIC_VUNAQ, "vrecpe", "f16",
+                        v8f16, v8f16, int_arm_neon_vrecpe>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VRECPS   : Vector Reciprocal Step
 def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
@@ -5181,6 +5361,14 @@ def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
 def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
                         IIC_VRECSQ, "vrecps", "f32",
                         v4f32, v4f32, int_arm_neon_vrecps, 1>;
+def  VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSD, "vrecps", "f16",
+                        v4f16, v4f16, int_arm_neon_vrecps, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSQ, "vrecps", "f16",
+                        v8f16, v8f16, int_arm_neon_vrecps, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VRSQRTE  : Vector Reciprocal Square Root Estimate
 def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
@@ -5195,6 +5383,14 @@ def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
 def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
                          IIC_VUNAQ, "vrsqrte", "f32",
                          v4f32, v4f32, int_arm_neon_vrsqrte>;
+def  VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+                         IIC_VUNAD, "vrsqrte", "f16",
+                         v4f16, v4f16, int_arm_neon_vrsqrte>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+                         IIC_VUNAQ, "vrsqrte", "f16",
+                         v8f16, v8f16, int_arm_neon_vrsqrte>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VRSQRTS  : Vector Reciprocal Square Root Step
 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
@@ -5203,6 +5399,14 @@ def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
                         IIC_VRECSQ, "vrsqrts", "f32",
                         v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSD, "vrsqrts", "f16",
+                        v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSQ, "vrsqrts", "f16",
+                        v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 // Vector Shifts.
 
@@ -5354,6 +5558,14 @@ def  VABSfd   : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
 def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
                      "vabs", "f32",
                       v4f32, v4f32, fabs>;
+def  VABShd   : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+                     "vabs", "f16",
+                     v4f16, v4f16, fabs>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VABShq   : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+                     "vabs", "f16",
+                      v8f16, v8f16, fabs>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
                (v2i32 (bitconvert (v8i8 (add DPR:$src,
@@ -5416,6 +5628,16 @@ def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
                     "vneg", "f32", "$Vd, $Vm", "",
                     [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+def  VNEGhd   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
+                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+                    "vneg", "f16", "$Vd, $Vm", "",
+                    [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VNEGhq   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
+                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+                    "vneg", "f16", "$Vd, $Vm", "",
+                    [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
 def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
@@ -5886,18 +6108,56 @@ def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
 def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
                      v4f32, v4i32, uint_to_fp>;
 
+def  VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+                     v4i16, v4f16, fp_to_sint>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+                     v4i16, v4f16, fp_to_uint>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+                     v4f16, v4i16, sint_to_fp>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+                     v4f16, v4i16, uint_to_fp>,
+                Requires<[HasNEON, HasFullFP16]>;
+
+def  VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+                     v8i16, v8f16, fp_to_sint>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+                     v8i16, v8f16, fp_to_uint>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+                     v8f16, v8i16, sint_to_fp>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+                     v8f16, v8i16, uint_to_fp>,
+                Requires<[HasNEON, HasFullFP16]>;
+
 // VCVT{A, N, P, M}
 multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
                     SDPatternOperator IntU> {
   let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-    def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+    def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
                        "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
-    def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+    def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
                        "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
-    def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+    def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
                        "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
-    def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+    def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
                        "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
+    def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+                       "s16.f16", v4i16, v4f16, IntS>,
+              Requires<[HasV8, HasNEON, HasFullFP16]>;
+    def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+                       "s16.f16", v8i16, v8f16, IntS>,
+              Requires<[HasV8, HasNEON, HasFullFP16]>;
+    def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+                       "u16.f16", v4i16, v4f16, IntU>,
+              Requires<[HasV8, HasNEON, HasFullFP16]>;
+    def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+                       "u16.f16", v8i16, v8f16, IntU>,
+              Requires<[HasV8, HasNEON, HasFullFP16]>;
   }
 }
 
@@ -5916,6 +6176,16 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
                         v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
                         v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+                        v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+                        v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+                        v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+                        v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
 }
 
 let DecoderMethod = "DecodeVCVTQ" in {
@@ -5927,6 +6197,16 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
                         v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
                         v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+                        v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+                        v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+                        v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+                        v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
 }
 
 def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
@@ -5947,6 +6227,24 @@ def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
 def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
                     (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
 
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
+                    (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
+                    (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
+                    (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
+                    (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
+                    (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
+                    (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
+                    (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
+                    (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
 
 //   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
 def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
@@ -6200,22 +6498,40 @@ def  VTBX4Pseudo
 // VRINT      : Vector Rounding
 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
   let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-    def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
+    def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
                       !strconcat("vrint", op), "f32",
                       v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
       let Inst{9-7} = op9_7;
     }
-    def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
+    def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
                       !strconcat("vrint", op), "f32",
                       v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
       let Inst{9-7} = op9_7;
     }
+    def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+                      !strconcat("vrint", op), "f16",
+                      v4f16, v4f16, Int>,
+             Requires<[HasV8, HasNEON, HasFullFP16]> {
+      let Inst{9-7} = op9_7;
+    }
+    def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+                      !strconcat("vrint", op), "f16",
+                      v8f16, v8f16, Int>,
+             Requires<[HasV8, HasNEON, HasFullFP16]> {
+      let Inst{9-7} = op9_7;
+    }
   }
 
   def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
-                  (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+                  (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
   def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
-                  (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
+                  (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
+                  (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
+  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
+                  (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
+  }
 }
 
 defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
@@ -7722,6 +8038,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
                     (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
                     (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
+                    (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
 // Q-register versions.
 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
                     (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7737,6 +8056,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
                     (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
                     (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
+                    (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
 
 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
 // D-register versions.
@@ -7754,6 +8076,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
                     (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
                     (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
+                    (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
 // Q-register versions.
 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
                     (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7769,6 +8094,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
                     (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
                     (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
+                    (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
 
 // VSWP allows, but does not require, a type suffix.
 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
index 7c0319a01ee71d58aadbc82ff618a78f26751e23..02cbfb1fa9f152c943492b67e9e38a6a3c606ef9 100644 (file)
@@ -288,7 +288,7 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
 // class.
 // ARM requires only word alignment for double. It's more performant if it
 // is double-word alignment though.
-def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
                         (sequence "D%u", 0, 31)> {
   // Allocate non-VFP2 registers D16-D31 first, and prefer even registers on
   // Darwin platforms.
@@ -301,16 +301,16 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
 
 // Subset of DPR that are accessible with VFP2 (and so that also have
 // 32-bit SPR subregs).
-def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
                              (trunc DPR, 16)>;
 
 // Subset of DPR which can be used as a source of NEON scalars for 16-bit
 // operations
-def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
                           (trunc DPR, 8)>;
 
 // Generic 128-bit vector register class.
-def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
                         (sequence "Q%u", 0, 15)> {
   // Allocate non-VFP2 aliases Q8-Q15 first.
   let AltOrders = [(rotl QPR, 8)];
index 72c98f01b38e3e5dd44008a327c556105c519736..8ba36f8289d221e214f6cd746a4596cee18deea1 100644 (file)
@@ -5643,9 +5643,11 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
   // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
   unsigned RegIdx = 3;
   if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
-      static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") {
+      (static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" ||
+       static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) {
     if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
-        static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32")
+        (static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32" ||
+         static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f16"))
       RegIdx = 4;
 
     if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
index a05111e4ceb856acc7798660d0862533df2b7541..bc63c26b328da40bdb6dc7c0671a676783977e9c 100644 (file)
@@ -5073,6 +5073,10 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder) {
+  const FeatureBitset &featureBits =
+      ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+  bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
   unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
   Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
   unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5083,10 +5087,35 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
 
   DecodeStatus S = MCDisassembler::Success;
 
-  // VMOVv2f32 is ambiguous with these decodings.
-  if (!(imm & 0x38) && cmode == 0xF) {
-    if (op == 1) return MCDisassembler::Fail;
-    Inst.setOpcode(ARM::VMOVv2f32);
+  // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+  if (!(imm & 0x38)) {
+    if (cmode == 0xF) {
+      if (op == 1) return MCDisassembler::Fail;
+      Inst.setOpcode(ARM::VMOVv2f32);
+    }
+    if (hasFullFP16) {
+      if (cmode == 0xE) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMOVv1i64);
+        } else {
+          Inst.setOpcode(ARM::VMOVv8i8);
+        }
+      }
+      if (cmode == 0xD) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMVNv2i32);
+        } else {
+          Inst.setOpcode(ARM::VMOVv2i32);
+        }
+      }
+      if (cmode == 0xC) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMVNv2i32);
+        } else {
+          Inst.setOpcode(ARM::VMOVv2i32);
+        }
+      }
+    }
     return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
   }
 
@@ -5103,6 +5132,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder) {
+  const FeatureBitset &featureBits =
+      ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+  bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
   unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
   Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
   unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5113,10 +5146,35 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
 
   DecodeStatus S = MCDisassembler::Success;
 
-  // VMOVv4f32 is ambiguous with these decodings.
-  if (!(imm & 0x38) && cmode == 0xF) {
-    if (op == 1) return MCDisassembler::Fail;
-    Inst.setOpcode(ARM::VMOVv4f32);
+  // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+  if (!(imm & 0x38)) {
+    if (cmode == 0xF) {
+      if (op == 1) return MCDisassembler::Fail;
+      Inst.setOpcode(ARM::VMOVv4f32);
+    }
+    if (hasFullFP16) {
+      if (cmode == 0xE) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMOVv2i64);
+        } else {
+          Inst.setOpcode(ARM::VMOVv16i8);
+        }
+      }
+      if (cmode == 0xD) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMVNv4i32);
+        } else {
+          Inst.setOpcode(ARM::VMOVv4i32);
+        }
+      }
+      if (cmode == 0xC) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMVNv4i32);
+        } else {
+          Inst.setOpcode(ARM::VMOVv4i32);
+        }
+      }
+    }
     return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
   }
 
diff --git a/test/MC/ARM/fullfp16-neon-neg.s b/test/MC/ARM/fullfp16-neon-neg.s
new file mode 100644 (file)
index 0000000..1928163
--- /dev/null
@@ -0,0 +1,289 @@
+@ RUN: not llvm-mc -triple armv8a-none-eabi -mattr=-fullfp16,+neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple armv8a-none-eabi -mattr=+fullfp16,-neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv8a-none-eabi -mattr=-fullfp16,+neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv8a-none-eabi -mattr=+fullfp16,-neon -show-encoding < %s 2>&1 | FileCheck %s
+
+  vadd.f16 d0, d1, d2
+  vadd.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vsub.f16 d0, d1, d2
+  vsub.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmul.f16 d0, d1, d2
+  vmul.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmul.f16 d1, d2, d3[2]
+  vmul.f16 q4, q5, d6[3]
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmla.f16 d0, d1, d2
+  vmla.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmla.f16 d5, d6, d7[2]
+  vmla.f16 q5, q6, d7[3]
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmls.f16 d0, d1, d2
+  vmls.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmls.f16 d5, d6, d7[2]
+  vmls.f16 q5, q6, d7[3]
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vfma.f16 d0, d1, d2
+  vfma.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vfms.f16 d0, d1, d2
+  vfms.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vceq.f16 d2, d3, d4
+  vceq.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vceq.f16 d2, d3, #0
+  vceq.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcge.f16 d2, d3, d4
+  vcge.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcge.f16 d2, d3, #0
+  vcge.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcgt.f16 d2, d3, d4
+  vcgt.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcgt.f16 d2, d3, #0
+  vcgt.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcle.f16 d2, d3, d4
+  vcle.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcle.f16 d2, d3, #0
+  vcle.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vclt.f16 d2, d3, d4
+  vclt.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vclt.f16 d2, d3, #0
+  vclt.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vacge.f16 d0, d1, d2
+  vacge.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vacgt.f16 d0, d1, d2
+  vacgt.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vacle.f16 d0, d1, d2
+  vacle.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vaclt.f16 d0, d1, d2
+  vaclt.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vabd.f16 d0, d1, d2
+  vabd.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vabs.f16 d0, d1
+  vabs.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmax.f16 d0, d1, d2
+  vmax.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmin.f16 d0, d1, d2
+  vmin.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmaxnm.f16 d0, d1, d2
+  vmaxnm.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vminnm.f16 d0, d1, d2
+  vminnm.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vpadd.f16 d0, d1, d2
+@ CHECK: error: instruction requires:
+
+  vpmax.f16 d0, d1, d2
+@ CHECK: error: instruction requires:
+
+  vpmin.f16 d0, d1, d2
+@ CHECK: error: instruction requires:
+
+  vrecpe.f16 d0, d1
+  vrecpe.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrecps.f16 d0, d1, d2
+  vrecps.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrsqrte.f16 d0, d1
+  vrsqrte.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrsqrts.f16 d0, d1, d2
+  vrsqrts.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vneg.f16 d0, d1
+  vneg.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvt.s16.f16 d0, d1
+  vcvt.u16.f16 d0, d1
+  vcvt.f16.s16 d0, d1
+  vcvt.f16.u16 d0, d1
+  vcvt.s16.f16 q0, q1
+  vcvt.u16.f16 q0, q1
+  vcvt.f16.s16 q0, q1
+  vcvt.f16.u16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvta.s16.f16 d0, d1
+  vcvta.s16.f16 q0, q1
+  vcvta.u16.f16 d0, d1
+  vcvta.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvtm.s16.f16 d0, d1
+  vcvtm.s16.f16 q0, q1
+  vcvtm.u16.f16 d0, d1
+  vcvtm.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvtn.s16.f16 d0, d1
+  vcvtn.s16.f16 q0, q1
+  vcvtn.u16.f16 d0, d1
+  vcvtn.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvtp.s16.f16 d0, d1
+  vcvtp.s16.f16 q0, q1
+  vcvtp.u16.f16 d0, d1
+  vcvtp.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+
+  vcvt.s16.f16 d0, d1, #1
+  vcvt.u16.f16 d0, d1, #2
+  vcvt.f16.s16 d0, d1, #3
+  vcvt.f16.u16 d0, d1, #4
+  vcvt.s16.f16 q0, q1, #5
+  vcvt.u16.f16 q0, q1, #6
+  vcvt.f16.s16 q0, q1, #7
+  vcvt.f16.u16 q0, q1, #8
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrinta.f16.f16 d0, d1
+  vrinta.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintm.f16.f16 d0, d1
+  vrintm.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintn.f16.f16 d0, d1
+  vrintn.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintp.f16.f16 d0, d1
+  vrintp.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintx.f16.f16 d0, d1
+  vrintx.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintz.f16.f16 d0, d1
+  vrintz.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
diff --git a/test/MC/ARM/fullfp16-neon.s b/test/MC/ARM/fullfp16-neon.s
new file mode 100644 (file)
index 0000000..32a3372
--- /dev/null
@@ -0,0 +1,404 @@
+@ RUN: llvm-mc -triple armv8a-none-eabi -mattr=+fullfp16,+neon -show-encoding < %s | FileCheck %s --check-prefix=ARM
+@ RUN: llvm-mc -triple thumbv8a-none-eabi -mattr=+fullfp16,+neon -show-encoding < %s | FileCheck %s --check-prefix=THUMB
+
+  vadd.f16 d0, d1, d2
+  vadd.f16 q0, q1, q2
+@ ARM:   vadd.f16        d0, d1, d2      @ encoding: [0x02,0x0d,0x11,0xf2]
+@ ARM:   vadd.f16        q0, q1, q2      @ encoding: [0x44,0x0d,0x12,0xf2]
+@ THUMB: vadd.f16        d0, d1, d2      @ encoding: [0x11,0xef,0x02,0x0d]
+@ THUMB: vadd.f16        q0, q1, q2      @ encoding: [0x12,0xef,0x44,0x0d]
+
+  vsub.f16 d0, d1, d2
+  vsub.f16 q0, q1, q2
+@ ARM:   vsub.f16        d0, d1, d2      @ encoding: [0x02,0x0d,0x31,0xf2]
+@ ARM:   vsub.f16        q0, q1, q2      @ encoding: [0x44,0x0d,0x32,0xf2]
+@ THUMB: vsub.f16        d0, d1, d2      @ encoding: [0x31,0xef,0x02,0x0d]
+@ THUMB: vsub.f16        q0, q1, q2      @ encoding: [0x32,0xef,0x44,0x0d]
+
+  vmul.f16 d0, d1, d2
+  vmul.f16 q0, q1, q2
+@ ARM:   vmul.f16        d0, d1, d2      @ encoding: [0x12,0x0d,0x11,0xf3]
+@ ARM:   vmul.f16        q0, q1, q2      @ encoding: [0x54,0x0d,0x12,0xf3]
+@ THUMB: vmul.f16        d0, d1, d2      @ encoding: [0x11,0xff,0x12,0x0d]
+@ THUMB: vmul.f16        q0, q1, q2      @ encoding: [0x12,0xff,0x54,0x0d]
+
+  vmul.f16 d1, d2, d3[2]
+  vmul.f16 q4, q5, d6[3]
+@ ARM:   vmul.f16        d1, d2, d3[2]   @ encoding: [0x63,0x19,0x92,0xf2]
+@ ARM:   vmul.f16        q4, q5, d6[3]   @ encoding: [0x6e,0x89,0x9a,0xf3]
+@ THUMB: vmul.f16        d1, d2, d3[2]   @ encoding: [0x92,0xef,0x63,0x19]
+@ THUMB: vmul.f16        q4, q5, d6[3]   @ encoding: [0x9a,0xff,0x6e,0x89]
+
+  vmla.f16 d0, d1, d2
+  vmla.f16 q0, q1, q2
+@ ARM:   vmla.f16        d0, d1, d2      @ encoding: [0x12,0x0d,0x11,0xf2]
+@ ARM:   vmla.f16        q0, q1, q2      @ encoding: [0x54,0x0d,0x12,0xf2]
+@ THUMB: vmla.f16        d0, d1, d2      @ encoding: [0x11,0xef,0x12,0x0d]
+@ THUMB: vmla.f16        q0, q1, q2      @ encoding: [0x12,0xef,0x54,0x0d]
+
+  vmla.f16 d5, d6, d7[2]
+  vmla.f16 q5, q6, d7[3]
+@ ARM:   vmla.f16        d5, d6, d7[2]   @ encoding: [0x67,0x51,0x96,0xf2]
+@ ARM:   vmla.f16        q5, q6, d7[3]   @ encoding: [0x6f,0xa1,0x9c,0xf3]
+@ THUMB: vmla.f16        d5, d6, d7[2]   @ encoding: [0x96,0xef,0x67,0x51]
+@ THUMB: vmla.f16        q5, q6, d7[3]   @ encoding: [0x9c,0xff,0x6f,0xa1]
+
+  vmls.f16 d0, d1, d2
+  vmls.f16 q0, q1, q2
+@ ARM:   vmls.f16        d0, d1, d2      @ encoding: [0x12,0x0d,0x31,0xf2]
+@ ARM:   vmls.f16        q0, q1, q2      @ encoding: [0x54,0x0d,0x32,0xf2]
+@ THUMB: vmls.f16        d0, d1, d2      @ encoding: [0x31,0xef,0x12,0x0d]
+@ THUMB: vmls.f16        q0, q1, q2      @ encoding: [0x32,0xef,0x54,0x0d]
+
+  vmls.f16 d5, d6, d7[2]
+  vmls.f16 q5, q6, d7[3]
+@ ARM:   vmls.f16        d5, d6, d7[2]   @ encoding: [0x67,0x55,0x96,0xf2]
+@ ARM:   vmls.f16        q5, q6, d7[3]   @ encoding: [0x6f,0xa5,0x9c,0xf3]
+@ THUMB: vmls.f16        d5, d6, d7[2]   @ encoding: [0x96,0xef,0x67,0x55]
+@ THUMB: vmls.f16        q5, q6, d7[3]   @ encoding: [0x9c,0xff,0x6f,0xa5]
+
+  vfma.f16 d0, d1, d2
+  vfma.f16 q0, q1, q2
+@ ARM:   vfma.f16        d0, d1, d2      @ encoding: [0x12,0x0c,0x11,0xf2]
+@ ARM:   vfma.f16        q0, q1, q2      @ encoding: [0x54,0x0c,0x12,0xf2]
+@ THUMB: vfma.f16        d0, d1, d2      @ encoding: [0x11,0xef,0x12,0x0c]
+@ THUMB: vfma.f16        q0, q1, q2      @ encoding: [0x12,0xef,0x54,0x0c]
+
+  vfms.f16 d0, d1, d2
+  vfms.f16 q0, q1, q2
+@ ARM:   vfms.f16        d0, d1, d2      @ encoding: [0x12,0x0c,0x31,0xf2]
+@ ARM:   vfms.f16        q0, q1, q2      @ encoding: [0x54,0x0c,0x32,0xf2]
+@ THUMB: vfms.f16        d0, d1, d2      @ encoding: [0x31,0xef,0x12,0x0c]
+@ THUMB: vfms.f16        q0, q1, q2      @ encoding: [0x32,0xef,0x54,0x0c]
+
+  vceq.f16 d2, d3, d4
+  vceq.f16 q2, q3, q4
+@ ARM:   vceq.f16        d2, d3, d4      @ encoding: [0x04,0x2e,0x13,0xf2]
+@ ARM:   vceq.f16        q2, q3, q4      @ encoding: [0x48,0x4e,0x16,0xf2]
+@ THUMB: vceq.f16        d2, d3, d4      @ encoding: [0x13,0xef,0x04,0x2e]
+@ THUMB: vceq.f16        q2, q3, q4      @ encoding: [0x16,0xef,0x48,0x4e]
+
+  vceq.f16 d2, d3, #0
+  vceq.f16 q2, q3, #0
+@ ARM:   vceq.f16        d2, d3, #0      @ encoding: [0x03,0x25,0xb5,0xf3]
+@ ARM:   vceq.f16        q2, q3, #0      @ encoding: [0x46,0x45,0xb5,0xf3]
+@ THUMB: vceq.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x03,0x25]
+@ THUMB: vceq.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0x46,0x45]
+
+  vcge.f16 d2, d3, d4
+  vcge.f16 q2, q3, q4
+@ ARM:   vcge.f16        d2, d3, d4      @ encoding: [0x04,0x2e,0x13,0xf3]
+@ ARM:   vcge.f16        q2, q3, q4      @ encoding: [0x48,0x4e,0x16,0xf3]
+@ THUMB: vcge.f16        d2, d3, d4      @ encoding: [0x13,0xff,0x04,0x2e]
+@ THUMB: vcge.f16        q2, q3, q4      @ encoding: [0x16,0xff,0x48,0x4e]
+
+  vcge.f16 d2, d3, #0
+  vcge.f16 q2, q3, #0
+@ ARM:   vcge.f16        d2, d3, #0      @ encoding: [0x83,0x24,0xb5,0xf3]
+@ ARM:   vcge.f16        q2, q3, #0      @ encoding: [0xc6,0x44,0xb5,0xf3]
+@ THUMB: vcge.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x83,0x24]
+@ THUMB: vcge.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0xc6,0x44]
+
+  vcgt.f16 d2, d3, d4
+  vcgt.f16 q2, q3, q4
+@ ARM:   vcgt.f16        d2, d3, d4      @ encoding: [0x04,0x2e,0x33,0xf3]
+@ ARM:   vcgt.f16        q2, q3, q4      @ encoding: [0x48,0x4e,0x36,0xf3]
+@ THUMB: vcgt.f16        d2, d3, d4      @ encoding: [0x33,0xff,0x04,0x2e]
+@ THUMB: vcgt.f16        q2, q3, q4      @ encoding: [0x36,0xff,0x48,0x4e]
+
+  vcgt.f16 d2, d3, #0
+  vcgt.f16 q2, q3, #0
+@ ARM:   vcgt.f16        d2, d3, #0      @ encoding: [0x03,0x24,0xb5,0xf3]
+@ ARM:   vcgt.f16        q2, q3, #0      @ encoding: [0x46,0x44,0xb5,0xf3]
+@ THUMB: vcgt.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x03,0x24]
+@ THUMB: vcgt.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0x46,0x44]
+
+  vcle.f16 d2, d3, d4
+  vcle.f16 q2, q3, q4
+@ ARM:   vcge.f16        d2, d4, d3      @ encoding: [0x03,0x2e,0x14,0xf3]
+@ ARM:   vcge.f16        q2, q4, q3      @ encoding: [0x46,0x4e,0x18,0xf3]
+@ THUMB: vcge.f16        d2, d4, d3      @ encoding: [0x14,0xff,0x03,0x2e]
+@ THUMB: vcge.f16        q2, q4, q3      @ encoding: [0x18,0xff,0x46,0x4e]
+
+  vcle.f16 d2, d3, #0
+  vcle.f16 q2, q3, #0
+@ ARM:   vcle.f16        d2, d3, #0      @ encoding: [0x83,0x25,0xb5,0xf3]
+@ ARM:   vcle.f16        q2, q3, #0      @ encoding: [0xc6,0x45,0xb5,0xf3]
+@ THUMB: vcle.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x83,0x25]
+@ THUMB: vcle.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0xc6,0x45]
+
+  vclt.f16 d2, d3, d4
+  vclt.f16 q2, q3, q4
+@ ARM:   vcgt.f16        d2, d4, d3      @ encoding: [0x03,0x2e,0x34,0xf3]
+@ ARM:   vcgt.f16        q2, q4, q3      @ encoding: [0x46,0x4e,0x38,0xf3]
+@ THUMB: vcgt.f16        d2, d4, d3      @ encoding: [0x34,0xff,0x03,0x2e]
+@ THUMB: vcgt.f16        q2, q4, q3      @ encoding: [0x38,0xff,0x46,0x4e]
+
+  vclt.f16 d2, d3, #0
+  vclt.f16 q2, q3, #0
+@ ARM:   vclt.f16        d2, d3, #0      @ encoding: [0x03,0x26,0xb5,0xf3]
+@ ARM:   vclt.f16        q2, q3, #0      @ encoding: [0x46,0x46,0xb5,0xf3]
+@ THUMB: vclt.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x03,0x26]
+@ THUMB: vclt.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0x46,0x46]
+
+  vacge.f16 d0, d1, d2
+  vacge.f16 q0, q1, q2
+@ ARM:   vacge.f16       d0, d1, d2      @ encoding: [0x12,0x0e,0x11,0xf3]
+@ ARM:   vacge.f16       q0, q1, q2      @ encoding: [0x54,0x0e,0x12,0xf3]
+@ THUMB: vacge.f16       d0, d1, d2      @ encoding: [0x11,0xff,0x12,0x0e]
+@ THUMB: vacge.f16       q0, q1, q2      @ encoding: [0x12,0xff,0x54,0x0e]
+
+  vacgt.f16 d0, d1, d2
+  vacgt.f16 q0, q1, q2
+@ ARM:   vacgt.f16       d0, d1, d2      @ encoding: [0x12,0x0e,0x31,0xf3]
+@ ARM:   vacgt.f16       q0, q1, q2      @ encoding: [0x54,0x0e,0x32,0xf3]
+@ THUMB: vacgt.f16       d0, d1, d2      @ encoding: [0x31,0xff,0x12,0x0e]
+@ THUMB: vacgt.f16       q0, q1, q2      @ encoding: [0x32,0xff,0x54,0x0e]
+
+  vacle.f16 d0, d1, d2
+  vacle.f16 q0, q1, q2
+@ ARM:   vacge.f16       d0, d2, d1      @ encoding: [0x11,0x0e,0x12,0xf3]
+@ ARM:   vacge.f16       q0, q2, q1      @ encoding: [0x52,0x0e,0x14,0xf3]
+@ THUMB: vacge.f16       d0, d2, d1      @ encoding: [0x12,0xff,0x11,0x0e]
+@ THUMB: vacge.f16       q0, q2, q1      @ encoding: [0x14,0xff,0x52,0x0e]
+
+  vaclt.f16 d0, d1, d2
+  vaclt.f16 q0, q1, q2
+@ ARM:   vacgt.f16       d0, d2, d1      @ encoding: [0x11,0x0e,0x32,0xf3]
+@ ARM:   vacgt.f16       q0, q2, q1      @ encoding: [0x52,0x0e,0x34,0xf3]
+@ THUMB: vacgt.f16       d0, d2, d1      @ encoding: [0x32,0xff,0x11,0x0e]
+@ THUMB: vacgt.f16       q0, q2, q1      @ encoding: [0x34,0xff,0x52,0x0e]
+
+  vabd.f16 d0, d1, d2
+  vabd.f16 q0, q1, q2
+@ ARM:   vabd.f16        d0, d1, d2      @ encoding: [0x02,0x0d,0x31,0xf3]
+@ ARM:   vabd.f16        q0, q1, q2      @ encoding: [0x44,0x0d,0x32,0xf3]
+@ THUMB: vabd.f16        d0, d1, d2      @ encoding: [0x31,0xff,0x02,0x0d]
+@ THUMB: vabd.f16        q0, q1, q2      @ encoding: [0x32,0xff,0x44,0x0d]
+
+  vabs.f16 d0, d1
+  vabs.f16 q0, q1
+@ ARM:   vabs.f16        d0, d1          @ encoding: [0x01,0x07,0xb5,0xf3]
+@ ARM:   vabs.f16        q0, q1          @ encoding: [0x42,0x07,0xb5,0xf3]
+@ THUMB: vabs.f16        d0, d1          @ encoding: [0xb5,0xff,0x01,0x07]
+@ THUMB: vabs.f16        q0, q1          @ encoding: [0xb5,0xff,0x42,0x07]
+
+  vmax.f16 d0, d1, d2
+  vmax.f16 q0, q1, q2
+@ ARM:   vmax.f16        d0, d1, d2      @ encoding: [0x02,0x0f,0x11,0xf2]
+@ ARM:   vmax.f16        q0, q1, q2      @ encoding: [0x44,0x0f,0x12,0xf2]
+@ THUMB: vmax.f16        d0, d1, d2      @ encoding: [0x11,0xef,0x02,0x0f]
+@ THUMB: vmax.f16        q0, q1, q2      @ encoding: [0x12,0xef,0x44,0x0f]
+
+  vmin.f16 d0, d1, d2
+  vmin.f16 q0, q1, q2
+@ ARM:   vmin.f16        d0, d1, d2      @ encoding: [0x02,0x0f,0x31,0xf2]
+@ ARM:   vmin.f16        q0, q1, q2      @ encoding: [0x44,0x0f,0x32,0xf2]
+@ THUMB: vmin.f16        d0, d1, d2      @ encoding: [0x31,0xef,0x02,0x0f]
+@ THUMB: vmin.f16        q0, q1, q2      @ encoding: [0x32,0xef,0x44,0x0f]
+
+  vmaxnm.f16 d0, d1, d2
+  vmaxnm.f16 q0, q1, q2
+@ ARM:   vmaxnm.f16      d0, d1, d2      @ encoding: [0x12,0x0f,0x11,0xf3]
+@ ARM:   vmaxnm.f16      q0, q1, q2      @ encoding: [0x54,0x0f,0x12,0xf3]
+@ THUMB: vmaxnm.f16      d0, d1, d2      @ encoding: [0x11,0xff,0x12,0x0f]
+@ THUMB: vmaxnm.f16      q0, q1, q2      @ encoding: [0x12,0xff,0x54,0x0f]
+
+  vminnm.f16 d0, d1, d2
+  vminnm.f16 q0, q1, q2
+@ ARM:   vminnm.f16      d0, d1, d2      @ encoding: [0x12,0x0f,0x31,0xf3]
+@ ARM:   vminnm.f16      q0, q1, q2      @ encoding: [0x54,0x0f,0x32,0xf3]
+@ THUMB: vminnm.f16      d0, d1, d2      @ encoding: [0x31,0xff,0x12,0x0f]
+@ THUMB: vminnm.f16      q0, q1, q2      @ encoding: [0x32,0xff,0x54,0x0f]
+
+  vpadd.f16 d0, d1, d2
+@ ARM:   vpadd.f16       d0, d1, d2      @ encoding: [0x02,0x0d,0x11,0xf3]
+@ THUMB: vpadd.f16       d0, d1, d2      @ encoding: [0x11,0xff,0x02,0x0d]
+
+  vpmax.f16 d0, d1, d2
+@ ARM:   vpmax.f16       d0, d1, d2      @ encoding: [0x02,0x0f,0x11,0xf3]
+@ THUMB: vpmax.f16       d0, d1, d2      @ encoding: [0x11,0xff,0x02,0x0f]
+
+  vpmin.f16 d0, d1, d2
+@ ARM:   vpmin.f16       d0, d1, d2      @ encoding: [0x02,0x0f,0x31,0xf3]
+@ THUMB: vpmin.f16       d0, d1, d2      @ encoding: [0x31,0xff,0x02,0x0f]
+
+  vrecpe.f16 d0, d1
+  vrecpe.f16 q0, q1
+@ ARM:   vrecpe.f16      d0, d1          @ encoding: [0x01,0x05,0xb7,0xf3]
+@ ARM:   vrecpe.f16      q0, q1          @ encoding: [0x42,0x05,0xb7,0xf3]
+@ THUMB: vrecpe.f16      d0, d1          @ encoding: [0xb7,0xff,0x01,0x05]
+@ THUMB: vrecpe.f16      q0, q1          @ encoding: [0xb7,0xff,0x42,0x05]
+
+  vrecps.f16 d0, d1, d2
+  vrecps.f16 q0, q1, q2
+@ ARM:   vrecps.f16      d0, d1, d2      @ encoding: [0x12,0x0f,0x11,0xf2]
+@ ARM:   vrecps.f16      q0, q1, q2      @ encoding: [0x54,0x0f,0x12,0xf2]
+@ THUMB: vrecps.f16      d0, d1, d2      @ encoding: [0x11,0xef,0x12,0x0f]
+@ THUMB: vrecps.f16      q0, q1, q2      @ encoding: [0x12,0xef,0x54,0x0f]
+
+  vrsqrte.f16 d0, d1
+  vrsqrte.f16 q0, q1
+@ ARM:   vrsqrte.f16     d0, d1          @ encoding: [0x81,0x05,0xb7,0xf3]
+@ ARM:   vrsqrte.f16     q0, q1          @ encoding: [0xc2,0x05,0xb7,0xf3]
+@ THUMB: vrsqrte.f16     d0, d1          @ encoding: [0xb7,0xff,0x81,0x05]
+@ THUMB: vrsqrte.f16     q0, q1          @ encoding: [0xb7,0xff,0xc2,0x05]
+
+  vrsqrts.f16 d0, d1, d2
+  vrsqrts.f16 q0, q1, q2
+@ ARM:   vrsqrts.f16     d0, d1, d2      @ encoding: [0x12,0x0f,0x31,0xf2]
+@ ARM:   vrsqrts.f16     q0, q1, q2      @ encoding: [0x54,0x0f,0x32,0xf2]
+@ THUMB: vrsqrts.f16     d0, d1, d2      @ encoding: [0x31,0xef,0x12,0x0f]
+@ THUMB: vrsqrts.f16     q0, q1, q2      @ encoding: [0x32,0xef,0x54,0x0f]
+
+  vneg.f16 d0, d1
+  vneg.f16 q0, q1
+@ ARM:   vneg.f16        d0, d1          @ encoding: [0x81,0x07,0xb5,0xf3]
+@ ARM:   vneg.f16        q0, q1          @ encoding: [0xc2,0x07,0xb5,0xf3]
+@ THUMB: vneg.f16        d0, d1          @ encoding: [0xb5,0xff,0x81,0x07]
+@ THUMB: vneg.f16        q0, q1          @ encoding: [0xb5,0xff,0xc2,0x07]
+
+  vcvt.s16.f16 d0, d1
+  vcvt.u16.f16 d0, d1
+  vcvt.f16.s16 d0, d1
+  vcvt.f16.u16 d0, d1
+  vcvt.s16.f16 q0, q1
+  vcvt.u16.f16 q0, q1
+  vcvt.f16.s16 q0, q1
+  vcvt.f16.u16 q0, q1
+@ ARM:   vcvt.s16.f16    d0, d1          @ encoding: [0x01,0x07,0xb7,0xf3]
+@ ARM:   vcvt.u16.f16    d0, d1          @ encoding: [0x81,0x07,0xb7,0xf3]
+@ ARM:   vcvt.f16.s16    d0, d1          @ encoding: [0x01,0x06,0xb7,0xf3]
+@ ARM:   vcvt.f16.u16    d0, d1          @ encoding: [0x81,0x06,0xb7,0xf3]
+@ ARM:   vcvt.s16.f16    q0, q1          @ encoding: [0x42,0x07,0xb7,0xf3]
+@ ARM:   vcvt.u16.f16    q0, q1          @ encoding: [0xc2,0x07,0xb7,0xf3]
+@ ARM:   vcvt.f16.s16    q0, q1          @ encoding: [0x42,0x06,0xb7,0xf3]
+@ ARM:   vcvt.f16.u16    q0, q1          @ encoding: [0xc2,0x06,0xb7,0xf3]
+@ THUMB: vcvt.s16.f16    d0, d1          @ encoding: [0xb7,0xff,0x01,0x07]
+@ THUMB: vcvt.u16.f16    d0, d1          @ encoding: [0xb7,0xff,0x81,0x07]
+@ THUMB: vcvt.f16.s16    d0, d1          @ encoding: [0xb7,0xff,0x01,0x06]
+@ THUMB: vcvt.f16.u16    d0, d1          @ encoding: [0xb7,0xff,0x81,0x06]
+@ THUMB: vcvt.s16.f16    q0, q1          @ encoding: [0xb7,0xff,0x42,0x07]
+@ THUMB: vcvt.u16.f16    q0, q1          @ encoding: [0xb7,0xff,0xc2,0x07]
+@ THUMB: vcvt.f16.s16    q0, q1          @ encoding: [0xb7,0xff,0x42,0x06]
+@ THUMB: vcvt.f16.u16    q0, q1          @ encoding: [0xb7,0xff,0xc2,0x06]
+
+  vcvta.s16.f16 d0, d1
+  vcvta.s16.f16 q0, q1
+  vcvta.u16.f16 d0, d1
+  vcvta.u16.f16 q0, q1
+@ ARM:   vcvta.s16.f16   d0, d1          @ encoding: [0x01,0x00,0xb7,0xf3]
+@ ARM:   vcvta.s16.f16   q0, q1          @ encoding: [0x42,0x00,0xb7,0xf3]
+@ ARM:   vcvta.u16.f16   d0, d1          @ encoding: [0x81,0x00,0xb7,0xf3]
+@ ARM:   vcvta.u16.f16   q0, q1          @ encoding: [0xc2,0x00,0xb7,0xf3]
+@ THUMB: vcvta.s16.f16   d0, d1          @ encoding: [0xb7,0xff,0x01,0x00]
+@ THUMB: vcvta.s16.f16   q0, q1          @ encoding: [0xb7,0xff,0x42,0x00]
+@ THUMB: vcvta.u16.f16   d0, d1          @ encoding: [0xb7,0xff,0x81,0x00]
+@ THUMB: vcvta.u16.f16   q0, q1          @ encoding: [0xb7,0xff,0xc2,0x00]
+
+  vcvtm.s16.f16 d0, d1
+  vcvtm.s16.f16 q0, q1
+  vcvtm.u16.f16 d0, d1
+  vcvtm.u16.f16 q0, q1
+@ ARM:   vcvtm.s16.f16   d0, d1          @ encoding: [0x01,0x03,0xb7,0xf3]
+@ ARM:   vcvtm.s16.f16   q0, q1          @ encoding: [0x42,0x03,0xb7,0xf3]
+@ ARM:   vcvtm.u16.f16   d0, d1          @ encoding: [0x81,0x03,0xb7,0xf3]
+@ ARM:   vcvtm.u16.f16   q0, q1          @ encoding: [0xc2,0x03,0xb7,0xf3]
+@ THUMB: vcvtm.s16.f16   d0, d1          @ encoding: [0xb7,0xff,0x01,0x03]
+@ THUMB: vcvtm.s16.f16   q0, q1          @ encoding: [0xb7,0xff,0x42,0x03]
+@ THUMB: vcvtm.u16.f16   d0, d1          @ encoding: [0xb7,0xff,0x81,0x03]
+@ THUMB: vcvtm.u16.f16   q0, q1          @ encoding: [0xb7,0xff,0xc2,0x03]
+
+  vcvtn.s16.f16 d0, d1
+  vcvtn.s16.f16 q0, q1
+  vcvtn.u16.f16 d0, d1
+  vcvtn.u16.f16 q0, q1
+@ ARM:   vcvtn.s16.f16   d0, d1          @ encoding: [0x01,0x01,0xb7,0xf3]
+@ ARM:   vcvtn.s16.f16   q0, q1          @ encoding: [0x42,0x01,0xb7,0xf3]
+@ ARM:   vcvtn.u16.f16   d0, d1          @ encoding: [0x81,0x01,0xb7,0xf3]
+@ ARM:   vcvtn.u16.f16   q0, q1          @ encoding: [0xc2,0x01,0xb7,0xf3]
+@ THUMB: vcvtn.s16.f16   d0, d1          @ encoding: [0xb7,0xff,0x01,0x01]
+@ THUMB: vcvtn.s16.f16   q0, q1          @ encoding: [0xb7,0xff,0x42,0x01]
+@ THUMB: vcvtn.u16.f16   d0, d1          @ encoding: [0xb7,0xff,0x81,0x01]
+@ THUMB: vcvtn.u16.f16   q0, q1          @ encoding: [0xb7,0xff,0xc2,0x01]
+
+  vcvtp.s16.f16 d0, d1
+  vcvtp.s16.f16 q0, q1
+  vcvtp.u16.f16 d0, d1
+  vcvtp.u16.f16 q0, q1
+@ ARM:   vcvtp.s16.f16   d0, d1          @ encoding: [0x01,0x02,0xb7,0xf3]
+@ ARM:   vcvtp.s16.f16   q0, q1          @ encoding: [0x42,0x02,0xb7,0xf3]
+@ ARM:   vcvtp.u16.f16   d0, d1          @ encoding: [0x81,0x02,0xb7,0xf3]
+@ ARM:   vcvtp.u16.f16   q0, q1          @ encoding: [0xc2,0x02,0xb7,0xf3]
+@ THUMB: vcvtp.s16.f16   d0, d1          @ encoding: [0xb7,0xff,0x01,0x02]
+@ THUMB: vcvtp.s16.f16   q0, q1          @ encoding: [0xb7,0xff,0x42,0x02]
+@ THUMB: vcvtp.u16.f16   d0, d1          @ encoding: [0xb7,0xff,0x81,0x02]
+@ THUMB: vcvtp.u16.f16   q0, q1          @ encoding: [0xb7,0xff,0xc2,0x02]
+
+
+  vcvt.s16.f16 d0, d1, #1
+  vcvt.u16.f16 d0, d1, #2
+  vcvt.f16.s16 d0, d1, #3
+  vcvt.f16.u16 d0, d1, #4
+  vcvt.s16.f16 q0, q1, #5
+  vcvt.u16.f16 q0, q1, #6
+  vcvt.f16.s16 q0, q1, #7
+  vcvt.f16.u16 q0, q1, #8
+@ ARM:   vcvt.s16.f16    d0, d1, #1      @ encoding: [0x11,0x0d,0xbf,0xf2]
+@ ARM:   vcvt.u16.f16    d0, d1, #2      @ encoding: [0x11,0x0d,0xbe,0xf3]
+@ ARM:   vcvt.f16.s16    d0, d1, #3      @ encoding: [0x11,0x0c,0xbd,0xf2]
+@ ARM:   vcvt.f16.u16    d0, d1, #4      @ encoding: [0x11,0x0c,0xbc,0xf3]
+@ ARM:   vcvt.s16.f16    q0, q1, #5      @ encoding: [0x52,0x0d,0xbb,0xf2]
+@ ARM:   vcvt.u16.f16    q0, q1, #6      @ encoding: [0x52,0x0d,0xba,0xf3]
+@ ARM:   vcvt.f16.s16    q0, q1, #7      @ encoding: [0x52,0x0c,0xb9,0xf2]
+@ ARM:   vcvt.f16.u16    q0, q1, #8      @ encoding: [0x52,0x0c,0xb8,0xf3]
+@ THUMB: vcvt.s16.f16    d0, d1, #1      @ encoding: [0xbf,0xef,0x11,0x0d]
+@ THUMB: vcvt.u16.f16    d0, d1, #2      @ encoding: [0xbe,0xff,0x11,0x0d]
+@ THUMB: vcvt.f16.s16    d0, d1, #3      @ encoding: [0xbd,0xef,0x11,0x0c]
+@ THUMB: vcvt.f16.u16    d0, d1, #4      @ encoding: [0xbc,0xff,0x11,0x0c]
+@ THUMB: vcvt.s16.f16    q0, q1, #5      @ encoding: [0xbb,0xef,0x52,0x0d]
+@ THUMB: vcvt.u16.f16    q0, q1, #6      @ encoding: [0xba,0xff,0x52,0x0d]
+@ THUMB: vcvt.f16.s16    q0, q1, #7      @ encoding: [0xb9,0xef,0x52,0x0c]
+@ THUMB: vcvt.f16.u16    q0, q1, #8      @ encoding: [0xb8,0xff,0x52,0x0c]
+
+  vrinta.f16.f16 d0, d1
+  vrinta.f16.f16 q0, q1
+@ ARM:   vrinta.f16      d0, d1          @ encoding: [0x01,0x05,0xb6,0xf3]
+@ ARM:   vrinta.f16      q0, q1          @ encoding: [0x42,0x05,0xb6,0xf3]
+@ THUMB: vrinta.f16      d0, d1          @ encoding: [0xb6,0xff,0x01,0x05]
+@ THUMB: vrinta.f16      q0, q1          @ encoding: [0xb6,0xff,0x42,0x05]
+
+  vrintm.f16.f16 d0, d1
+  vrintm.f16.f16 q0, q1
+@ ARM:   vrintm.f16      d0, d1          @ encoding: [0x81,0x06,0xb6,0xf3]
+@ ARM:   vrintm.f16      q0, q1          @ encoding: [0xc2,0x06,0xb6,0xf3]
+@ THUMB: vrintm.f16      d0, d1          @ encoding: [0xb6,0xff,0x81,0x06]
+@ THUMB: vrintm.f16      q0, q1          @ encoding: [0xb6,0xff,0xc2,0x06]
+
+  vrintn.f16.f16 d0, d1
+  vrintn.f16.f16 q0, q1
+@ ARM:   vrintn.f16      d0, d1          @ encoding: [0x01,0x04,0xb6,0xf3]
+@ ARM:   vrintn.f16      q0, q1          @ encoding: [0x42,0x04,0xb6,0xf3]
+@ THUMB: vrintn.f16      d0, d1          @ encoding: [0xb6,0xff,0x01,0x04]
+@ THUMB: vrintn.f16      q0, q1          @ encoding: [0xb6,0xff,0x42,0x04]
+
+  vrintp.f16.f16 d0, d1
+  vrintp.f16.f16 q0, q1
+@ ARM:   vrintp.f16      d0, d1          @ encoding: [0x81,0x07,0xb6,0xf3]
+@ ARM:   vrintp.f16      q0, q1          @ encoding: [0xc2,0x07,0xb6,0xf3]
+@ THUMB: vrintp.f16      d0, d1          @ encoding: [0xb6,0xff,0x81,0x07]
+@ THUMB: vrintp.f16      q0, q1          @ encoding: [0xb6,0xff,0xc2,0x07]
+
+  vrintx.f16.f16 d0, d1
+  vrintx.f16.f16 q0, q1
+@ ARM:   vrintx.f16      d0, d1          @ encoding: [0x81,0x04,0xb6,0xf3]
+@ ARM:   vrintx.f16      q0, q1          @ encoding: [0xc2,0x04,0xb6,0xf3]
+@ THUMB: vrintx.f16      d0, d1          @ encoding: [0xb6,0xff,0x81,0x04]
+@ THUMB: vrintx.f16      q0, q1          @ encoding: [0xb6,0xff,0xc2,0x04]
+
+  vrintz.f16.f16 d0, d1
+  vrintz.f16.f16 q0, q1
+@ ARM:   vrintz.f16      d0, d1          @ encoding: [0x81,0x05,0xb6,0xf3]
+@ ARM:   vrintz.f16      q0, q1          @ encoding: [0xc2,0x05,0xb6,0xf3]
+@ THUMB: vrintz.f16      d0, d1          @ encoding: [0xb6,0xff,0x81,0x05]
+@ THUMB: vrintz.f16      q0, q1          @ encoding: [0xb6,0xff,0xc2,0x05]
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt b/test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt
new file mode 100644 (file)
index 0000000..9c8be17
--- /dev/null
@@ -0,0 +1,274 @@
+# RUN: not llvm-mc -disassemble -triple armv8a-none-eabi -mattr=-fullfp16,+neon -show-encoding < %s 2>&1 | FileCheck %s
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x11,0xf2]
+[0x44,0x0d,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x31,0xf2]
+[0x44,0x0d,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0d,0x11,0xf3]
+[0x54,0x0d,0x12,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x63,0x19,0x92,0xf2]
+[0x6e,0x89,0x9a,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0d,0x11,0xf2]
+[0x54,0x0d,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x67,0x51,0x96,0xf2]
+[0x6f,0xa1,0x9c,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0d,0x31,0xf2]
+[0x54,0x0d,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x67,0x55,0x96,0xf2]
+[0x6f,0xa5,0x9c,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0c,0x11,0xf2]
+[0x54,0x0c,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0c,0x31,0xf2]
+[0x54,0x0c,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x04,0x2e,0x13,0xf2]
+[0x48,0x4e,0x16,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x03,0x25,0xb5,0xf3]
+[0x46,0x45,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x04,0x2e,0x13,0xf3]
+[0x48,0x4e,0x16,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x83,0x24,0xb5,0xf3]
+[0xc6,0x44,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x04,0x2e,0x33,0xf3]
+[0x48,0x4e,0x36,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x03,0x24,0xb5,0xf3]
+[0x46,0x44,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x83,0x25,0xb5,0xf3]
+[0xc6,0x45,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x03,0x26,0xb5,0xf3]
+[0x46,0x46,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0e,0x11,0xf3]
+[0x54,0x0e,0x12,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0e,0x31,0xf3]
+[0x54,0x0e,0x32,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x31,0xf3]
+[0x44,0x0d,0x32,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x07,0xb5,0xf3]
+[0x42,0x07,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x11,0xf2]
+[0x44,0x0f,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x31,0xf2]
+[0x44,0x0f,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x11,0xf3]
+[0x54,0x0f,0x12,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x31,0xf3]
+[0x54,0x0f,0x32,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x11,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x11,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x31,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x05,0xb7,0xf3]
+[0x42,0x05,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x11,0xf2]
+[0x54,0x0f,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x05,0xb7,0xf3]
+[0xc2,0x05,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x31,0xf2]
+[0x54,0x0f,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x07,0xb5,0xf3]
+[0xc2,0x07,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x07,0xb7,0xf3]
+[0x81,0x07,0xb7,0xf3]
+[0x01,0x06,0xb7,0xf3]
+[0x81,0x06,0xb7,0xf3]
+[0x42,0x07,0xb7,0xf3]
+[0xc2,0x07,0xb7,0xf3]
+[0x42,0x06,0xb7,0xf3]
+[0xc2,0x06,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x00,0xb7,0xf3]
+[0x42,0x00,0xb7,0xf3]
+[0x81,0x00,0xb7,0xf3]
+[0xc2,0x00,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x03,0xb7,0xf3]
+[0x42,0x03,0xb7,0xf3]
+[0x81,0x03,0xb7,0xf3]
+[0xc2,0x03,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x01,0xb7,0xf3]
+[0x42,0x01,0xb7,0xf3]
+[0x81,0x01,0xb7,0xf3]
+[0xc2,0x01,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x02,0xb7,0xf3]
+[0x42,0x02,0xb7,0xf3]
+[0x81,0x02,0xb7,0xf3]
+[0xc2,0x02,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0x0d,0xbf,0xf2]
+[0x11,0x0d,0xbe,0xf3]
+[0x11,0x0c,0xbd,0xf2]
+[0x11,0x0c,0xbc,0xf3]
+[0x52,0x0d,0xbb,0xf2]
+[0x52,0x0d,0xba,0xf3]
+[0x52,0x0c,0xb9,0xf2]
+[0x52,0x0c,0xb8,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x05,0xb6,0xf3]
+[0x42,0x05,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x06,0xb6,0xf3]
+[0xc2,0x06,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x04,0xb6,0xf3]
+[0x42,0x04,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x07,0xb6,0xf3]
+[0xc2,0x07,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x04,0xb6,0xf3]
+[0xc2,0x04,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x05,0xb6,0xf3]
+[0xc2,0x05,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0d,0xc7,0xf2]
+[0x20,0x0f,0xc7,0xf2]
+[0x70,0x0f,0xc7,0xf2]
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-arm.txt b/test/MC/Disassembler/ARM/fullfp16-neon-arm.txt
new file mode 100644 (file)
index 0000000..232bd64
--- /dev/null
@@ -0,0 +1,309 @@
+# RUN: not llvm-mc -disassemble -triple armv8a-none-eabi -mattr=+fullfp16,+neon -show-encoding < %s 2>%t | FileCheck %s
+# RUN FileCheck %s < %t --check-prefix=STDERR
+
+# CHECK: vadd.f16 d0, d1, d2
+# CHECK: vadd.f16 q0, q1, q2
+[0x02,0x0d,0x11,0xf2]
+[0x44,0x0d,0x12,0xf2]
+
+# CHECK: vsub.f16 d0, d1, d2
+# CHECK: vsub.f16 q0, q1, q2
+[0x02,0x0d,0x31,0xf2]
+[0x44,0x0d,0x32,0xf2]
+
+# CHECK: vmul.f16 d0, d1, d2
+# CHECK: vmul.f16 q0, q1, q2
+[0x12,0x0d,0x11,0xf3]
+[0x54,0x0d,0x12,0xf3]
+
+# CHECK: vmul.f16 d1, d2, d3[2]
+# CHECK: vmul.f16 q4, q5, d6[3]
+[0x63,0x19,0x92,0xf2]
+[0x6e,0x89,0x9a,0xf3]
+
+# CHECK: vmla.f16 d0, d1, d2
+# CHECK: vmla.f16 q0, q1, q2
+[0x12,0x0d,0x11,0xf2]
+[0x54,0x0d,0x12,0xf2]
+
+# CHECK: vmla.f16 d5, d6, d7[2]
+# CHECK: vmla.f16 q5, q6, d7[3]
+[0x67,0x51,0x96,0xf2]
+[0x6f,0xa1,0x9c,0xf3]
+
+# CHECK: vmls.f16 d0, d1, d2
+# CHECK: vmls.f16 q0, q1, q2
+[0x12,0x0d,0x31,0xf2]
+[0x54,0x0d,0x32,0xf2]
+
+# CHECK: vmls.f16 d5, d6, d7[2]
+# CHECK: vmls.f16 q5, q6, d7[3]
+[0x67,0x55,0x96,0xf2]
+[0x6f,0xa5,0x9c,0xf3]
+
+# CHECK: vfma.f16 d0, d1, d2
+# CHECK: vfma.f16 q0, q1, q2
+[0x12,0x0c,0x11,0xf2]
+[0x54,0x0c,0x12,0xf2]
+
+# CHECK: vfms.f16 d0, d1, d2
+# CHECK: vfms.f16 q0, q1, q2
+[0x12,0x0c,0x31,0xf2]
+[0x54,0x0c,0x32,0xf2]
+
+# CHECK: vceq.f16 d2, d3, d4
+# CHECK: vceq.f16 q2, q3, q4
+[0x04,0x2e,0x13,0xf2]
+[0x48,0x4e,0x16,0xf2]
+
+# CHECK: vceq.f16 d2, d3, #0
+# CHECK: vceq.f16 q2, q3, #0
+[0x03,0x25,0xb5,0xf3]
+[0x46,0x45,0xb5,0xf3]
+
+# CHECK: vcge.f16 d2, d3, d4
+# CHECK: vcge.f16 q2, q3, q4
+[0x04,0x2e,0x13,0xf3]
+[0x48,0x4e,0x16,0xf3]
+
+# CHECK: vcge.f16 d2, d3, #0
+# CHECK: vcge.f16 q2, q3, #0
+[0x83,0x24,0xb5,0xf3]
+[0xc6,0x44,0xb5,0xf3]
+
+# CHECK: vcgt.f16 d2, d3, d4
+# CHECK: vcgt.f16 q2, q3, q4
+[0x04,0x2e,0x33,0xf3]
+[0x48,0x4e,0x36,0xf3]
+
+# CHECK: vcgt.f16 d2, d3, #0
+# CHECK: vcgt.f16 q2, q3, #0
+[0x03,0x24,0xb5,0xf3]
+[0x46,0x44,0xb5,0xf3]
+
+# CHECK: vcle.f16 d2, d3, #0
+# CHECK: vcle.f16 q2, q3, #0
+[0x83,0x25,0xb5,0xf3]
+[0xc6,0x45,0xb5,0xf3]
+
+# CHECK: vclt.f16 d2, d3, #0
+# CHECK: vclt.f16 q2, q3, #0
+[0x03,0x26,0xb5,0xf3]
+[0x46,0x46,0xb5,0xf3]
+
+# CHECK: vacge.f16 d0, d1, d2
+# CHECK: vacge.f16 q0, q1, q2
+[0x12,0x0e,0x11,0xf3]
+[0x54,0x0e,0x12,0xf3]
+
+# CHECK: vacgt.f16 d0, d1, d2
+# CHECK: vacgt.f16 q0, q1, q2
+[0x12,0x0e,0x31,0xf3]
+[0x54,0x0e,0x32,0xf3]
+
+# CHECK: vabd.f16 d0, d1, d2
+# CHECK: vabd.f16 q0, q1, q2
+[0x02,0x0d,0x31,0xf3]
+[0x44,0x0d,0x32,0xf3]
+
+# CHECK: vabs.f16 d0, d1
+# CHECK: vabs.f16 q0, q1
+[0x01,0x07,0xb5,0xf3]
+[0x42,0x07,0xb5,0xf3]
+
+# CHECK: vmax.f16 d0, d1, d2
+# CHECK: vmax.f16 q0, q1, q2
+[0x02,0x0f,0x11,0xf2]
+[0x44,0x0f,0x12,0xf2]
+
+# CHECK: vmin.f16 d0, d1, d2
+# CHECK: vmin.f16 q0, q1, q2
+[0x02,0x0f,0x31,0xf2]
+[0x44,0x0f,0x32,0xf2]
+
+# CHECK: vmaxnm.f16 d0, d1, d2
+# CHECK: vmaxnm.f16 q0, q1, q2
+[0x12,0x0f,0x11,0xf3]
+[0x54,0x0f,0x12,0xf3]
+
+# CHECK: vminnm.f16 d0, d1, d2
+# CHECK: vminnm.f16 q0, q1, q2
+[0x12,0x0f,0x31,0xf3]
+[0x54,0x0f,0x32,0xf3]
+
+# CHECK: vpadd.f16 d0, d1, d2
+[0x02,0x0d,0x11,0xf3]
+
+# CHECK: vpmax.f16 d0, d1, d2
+[0x02,0x0f,0x11,0xf3]
+
+# CHECK: vpmin.f16 d0, d1, d2
+[0x02,0x0f,0x31,0xf3]
+
+# CHECK: vrecpe.f16 d0, d1
+# CHECK: vrecpe.f16 q0, q1
+[0x01,0x05,0xb7,0xf3]
+[0x42,0x05,0xb7,0xf3]
+
+# CHECK: vrecps.f16 d0, d1, d2
+# CHECK: vrecps.f16 q0, q1, q2
+[0x12,0x0f,0x11,0xf2]
+[0x54,0x0f,0x12,0xf2]
+
+# CHECK: vrsqrte.f16 d0, d1
+# CHECK: vrsqrte.f16 q0, q1
+[0x81,0x05,0xb7,0xf3]
+[0xc2,0x05,0xb7,0xf3]
+
+# CHECK: vrsqrts.f16 d0, d1, d2
+# CHECK: vrsqrts.f16 q0, q1, q2
+[0x12,0x0f,0x31,0xf2]
+[0x54,0x0f,0x32,0xf2]
+
+# CHECK: vneg.f16 d0, d1
+# CHECK: vneg.f16 q0, q1
+[0x81,0x07,0xb5,0xf3]
+[0xc2,0x07,0xb5,0xf3]
+
+# CHECK: vcvt.s16.f16 d0, d1
+# CHECK: vcvt.u16.f16 d0, d1
+# CHECK: vcvt.f16.s16 d0, d1
+# CHECK: vcvt.f16.u16 d0, d1
+# CHECK: vcvt.s16.f16 q0, q1
+# CHECK: vcvt.u16.f16 q0, q1
+# CHECK: vcvt.f16.s16 q0, q1
+# CHECK: vcvt.f16.u16 q0, q1
+[0x01,0x07,0xb7,0xf3]
+[0x81,0x07,0xb7,0xf3]
+[0x01,0x06,0xb7,0xf3]
+[0x81,0x06,0xb7,0xf3]
+[0x42,0x07,0xb7,0xf3]
+[0xc2,0x07,0xb7,0xf3]
+[0x42,0x06,0xb7,0xf3]
+[0xc2,0x06,0xb7,0xf3]
+
+# CHECK: vcvta.s16.f16 d0, d1
+# CHECK: vcvta.s16.f16 q0, q1
+# CHECK: vcvta.u16.f16 d0, d1
+# CHECK: vcvta.u16.f16 q0, q1
+[0x01,0x00,0xb7,0xf3]
+[0x42,0x00,0xb7,0xf3]
+[0x81,0x00,0xb7,0xf3]
+[0xc2,0x00,0xb7,0xf3]
+
+# CHECK: vcvtm.s16.f16 d0, d1
+# CHECK: vcvtm.s16.f16 q0, q1
+# CHECK: vcvtm.u16.f16 d0, d1
+# CHECK: vcvtm.u16.f16 q0, q1
+[0x01,0x03,0xb7,0xf3]
+[0x42,0x03,0xb7,0xf3]
+[0x81,0x03,0xb7,0xf3]
+[0xc2,0x03,0xb7,0xf3]
+
+# CHECK: vcvtn.s16.f16 d0, d1
+# CHECK: vcvtn.s16.f16 q0, q1
+# CHECK: vcvtn.u16.f16 d0, d1
+# CHECK: vcvtn.u16.f16 q0, q1
+[0x01,0x01,0xb7,0xf3]
+[0x42,0x01,0xb7,0xf3]
+[0x81,0x01,0xb7,0xf3]
+[0xc2,0x01,0xb7,0xf3]
+
+# CHECK: vcvtp.s16.f16 d0, d1
+# CHECK: vcvtp.s16.f16 q0, q1
+# CHECK: vcvtp.u16.f16 d0, d1
+# CHECK: vcvtp.u16.f16 q0, q1
+[0x01,0x02,0xb7,0xf3]
+[0x42,0x02,0xb7,0xf3]
+[0x81,0x02,0xb7,0xf3]
+[0xc2,0x02,0xb7,0xf3]
+
+# CHECK: vcvt.s16.f16 d0, d1, #1
+# CHECK: vcvt.u16.f16 d0, d1, #2
+# CHECK: vcvt.f16.s16 d0, d1, #3
+# CHECK: vcvt.f16.u16 d0, d1, #4
+# CHECK: vcvt.s16.f16 q0, q1, #5
+# CHECK: vcvt.u16.f16 q0, q1, #6
+# CHECK: vcvt.f16.s16 q0, q1, #7
+# CHECK: vcvt.f16.u16 q0, q1, #8
+[0x11,0x0d,0xbf,0xf2]
+[0x11,0x0d,0xbe,0xf3]
+[0x11,0x0c,0xbd,0xf2]
+[0x11,0x0c,0xbc,0xf3]
+[0x52,0x0d,0xbb,0xf2]
+[0x52,0x0d,0xba,0xf3]
+[0x52,0x0c,0xb9,0xf2]
+[0x52,0x0c,0xb8,0xf3]
+
+# CHECK: vrinta.f16 d0, d1
+# CHECK: vrinta.f16 q0, q1
+[0x01,0x05,0xb6,0xf3]
+[0x42,0x05,0xb6,0xf3]
+
+# CHECK: vrintm.f16 d0, d1
+# CHECK: vrintm.f16 q0, q1
+[0x81,0x06,0xb6,0xf3]
+[0xc2,0x06,0xb6,0xf3]
+
+# CHECK: vrintn.f16 d0, d1
+# CHECK: vrintn.f16 q0, q1
+[0x01,0x04,0xb6,0xf3]
+[0x42,0x04,0xb6,0xf3]
+
+# CHECK: vrintp.f16 d0, d1
+# CHECK: vrintp.f16 q0, q1
+[0x81,0x07,0xb6,0xf3]
+[0xc2,0x07,0xb6,0xf3]
+
+# CHECK: vrintx.f16 d0, d1
+# CHECK: vrintx.f16 q0, q1
+[0x81,0x04,0xb6,0xf3]
+[0xc2,0x04,0xb6,0xf3]
+
+# CHECK: vrintz.f16 d0, d1
+# CHECK: vrintz.f16 q0, q1
+[0x81,0x05,0xb6,0xf3]
+[0xc2,0x05,0xb6,0xf3]
+
+# Existing VMOV(immediate, Advanced SIMD) instructions within the encoding
+# space of the new FP16 VCVT(between floating - point and fixed - point,
+# Advanced SIMD):
+#  4 -- Q
+#  2 -- VMOV op
+#        1 -- VCVT op
+#        2 -- VCVT FP size
+[0x10,0x0c,0xc7,0xf2]
+[0x10,0x0d,0xc7,0xf2]
+[0x10,0x0e,0xc7,0xf2]
+[0x10,0x0f,0xc7,0xf2]
+[0x20,0x0c,0xc7,0xf2]
+[0x20,0x0d,0xc7,0xf2]
+[0x20,0x0e,0xc7,0xf2]
+[0x20,0x0f,0xc7,0xf2]
+[0x50,0x0c,0xc7,0xf2]
+[0x50,0x0d,0xc7,0xf2]
+[0x50,0x0e,0xc7,0xf2]
+[0x50,0x0f,0xc7,0xf2]
+[0x70,0x0c,0xc7,0xf2]
+[0x70,0x0d,0xc7,0xf2]
+[0x70,0x0e,0xc7,0xf2]
+[0x70,0x0f,0xc7,0xf2]
+# CHECK: vmov.i32        d16, #0x70ff
+# CHECK: vmov.i32        d16, #0x70ffff
+# CHECK: vmov.i8 d16, #0x70
+# CHECK: vmov.f32        d16, #1.000000e+00
+# CHECK: vmull.s8        q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0d,0xc7,0xf2]
+# CHECK: vmull.p8        q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0f,0xc7,0xf2]
+# CHECK: vmov.i32        q8, #0x70ff
+# CHECK: vmov.i32        q8, #0x70ffff
+# CHECK: vmov.i8 q8, #0x70
+# CHECK: vmov.f32        q8, #1.000000e+00
+# CHECK: vmvn.i32        q8, #0x70ff
+# CHECK: vmvn.i32        q8, #0x70ffff
+# CHECK: vmov.i64        q8, #0xffffff0000000
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x70,0x0f,0xc7,0xf2]
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt b/test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt
new file mode 100644 (file)
index 0000000..f7561bb
--- /dev/null
@@ -0,0 +1,274 @@
+# RUN: not llvm-mc -disassemble -triple thumbv8a-none-eabi -mattr=-fullfp16,+neon,+thumb-mode -show-encoding < %s 2>&1 | FileCheck %s
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x02,0x0d]
+[0x12,0xef,0x44,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x02,0x0d]
+[0x32,0xef,0x44,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x12,0x0d]
+[0x12,0xff,0x54,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x92,0xef,0x63,0x19]
+[0x9a,0xff,0x6e,0x89]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x12,0x0d]
+[0x12,0xef,0x54,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x96,0xef,0x67,0x51]
+[0x9c,0xff,0x6f,0xa1]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x12,0x0d]
+[0x32,0xef,0x54,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x96,0xef,0x67,0x55]
+[0x9c,0xff,0x6f,0xa5]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x12,0x0c]
+[0x12,0xef,0x54,0x0c]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x12,0x0c]
+[0x32,0xef,0x54,0x0c]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x13,0xef,0x04,0x2e]
+[0x16,0xef,0x48,0x4e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x03,0x25]
+[0xb5,0xff,0x46,0x45]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x13,0xff,0x04,0x2e]
+[0x16,0xff,0x48,0x4e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x83,0x24]
+[0xb5,0xff,0xc6,0x44]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x33,0xff,0x04,0x2e]
+[0x36,0xff,0x48,0x4e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x03,0x24]
+[0xb5,0xff,0x46,0x44]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x83,0x25]
+[0xb5,0xff,0xc6,0x45]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x03,0x26]
+[0xb5,0xff,0x46,0x46]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x12,0x0e]
+[0x12,0xff,0x54,0x0e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x12,0x0e]
+[0x32,0xff,0x54,0x0e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x02,0x0d]
+[0x32,0xff,0x44,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x01,0x07]
+[0xb5,0xff,0x42,0x07]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x02,0x0f]
+[0x12,0xef,0x44,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x02,0x0f]
+[0x32,0xef,0x44,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x12,0x0f]
+[0x12,0xff,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x12,0x0f]
+[0x32,0xff,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x02,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x02,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x02,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x05]
+[0xb7,0xff,0x42,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x12,0x0f]
+[0x12,0xef,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x81,0x05]
+[0xb7,0xff,0xc2,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x12,0x0f]
+[0x32,0xef,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x81,0x07]
+[0xb5,0xff,0xc2,0x07]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x07]
+[0xb7,0xff,0x81,0x07]
+[0xb7,0xff,0x01,0x06]
+[0xb7,0xff,0x81,0x06]
+[0xb7,0xff,0x42,0x07]
+[0xb7,0xff,0xc2,0x07]
+[0xb7,0xff,0x42,0x06]
+[0xb7,0xff,0xc2,0x06]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x00]
+[0xb7,0xff,0x42,0x00]
+[0xb7,0xff,0x81,0x00]
+[0xb7,0xff,0xc2,0x00]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x03]
+[0xb7,0xff,0x42,0x03]
+[0xb7,0xff,0x81,0x03]
+[0xb7,0xff,0xc2,0x03]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x01]
+[0xb7,0xff,0x42,0x01]
+[0xb7,0xff,0x81,0x01]
+[0xb7,0xff,0xc2,0x01]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x02]
+[0xb7,0xff,0x42,0x02]
+[0xb7,0xff,0x81,0x02]
+[0xb7,0xff,0xc2,0x02]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xbf,0xef,0x11,0x0d]
+[0xbe,0xff,0x11,0x0d]
+[0xbd,0xef,0x11,0x0c]
+[0xbc,0xff,0x11,0x0c]
+[0xbb,0xef,0x52,0x0d]
+[0xba,0xff,0x52,0x0d]
+[0xb9,0xef,0x52,0x0c]
+[0xb8,0xff,0x52,0x0c]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x01,0x05]
+[0xb6,0xff,0x42,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x06]
+[0xb6,0xff,0xc2,0x06]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x01,0x04]
+[0xb6,0xff,0x42,0x04]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x07]
+[0xb6,0xff,0xc2,0x07]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x04]
+[0xb6,0xff,0xc2,0x04]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x05]
+[0xb6,0xff,0xc2,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xc7,0xef,0x20,0x0d]
+[0xc7,0xef,0x20,0x0f]
+[0xc7,0xef,0x70,0x0f]
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt b/test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt
new file mode 100644 (file)
index 0000000..a776232
--- /dev/null
@@ -0,0 +1,309 @@
+# RUN: not llvm-mc -disassemble -triple thumbv8a-none-eabi -mattr=+fullfp16,+neon,+thumb-mode -show-encoding < %s 2>%t | FileCheck %s
+# RUN FileCheck %s < %t --check-prefix=STDERR
+
+# CHECK: vadd.f16 d0, d1, d2
+# CHECK: vadd.f16 q0, q1, q2
+[0x11,0xef,0x02,0x0d]
+[0x12,0xef,0x44,0x0d]
+
+# CHECK: vsub.f16 d0, d1, d2
+# CHECK: vsub.f16 q0, q1, q2
+[0x31,0xef,0x02,0x0d]
+[0x32,0xef,0x44,0x0d]
+
+# CHECK: vmul.f16 d0, d1, d2
+# CHECK: vmul.f16 q0, q1, q2
+[0x11,0xff,0x12,0x0d]
+[0x12,0xff,0x54,0x0d]
+
+# CHECK: vmul.f16 d1, d2, d3[2]
+# CHECK: vmul.f16 q4, q5, d6[3]
+[0x92,0xef,0x63,0x19]
+[0x9a,0xff,0x6e,0x89]
+
+# CHECK: vmla.f16 d0, d1, d2
+# CHECK: vmla.f16 q0, q1, q2
+[0x11,0xef,0x12,0x0d]
+[0x12,0xef,0x54,0x0d]
+
+# CHECK: vmla.f16 d5, d6, d7[2]
+# CHECK: vmla.f16 q5, q6, d7[3]
+[0x96,0xef,0x67,0x51]
+[0x9c,0xff,0x6f,0xa1]
+
+# CHECK: vmls.f16 d0, d1, d2
+# CHECK: vmls.f16 q0, q1, q2
+[0x31,0xef,0x12,0x0d]
+[0x32,0xef,0x54,0x0d]
+
+# CHECK: vmls.f16 d5, d6, d7[2]
+# CHECK: vmls.f16 q5, q6, d7[3]
+[0x96,0xef,0x67,0x55]
+[0x9c,0xff,0x6f,0xa5]
+
+# CHECK: vfma.f16 d0, d1, d2
+# CHECK: vfma.f16 q0, q1, q2
+[0x11,0xef,0x12,0x0c]
+[0x12,0xef,0x54,0x0c]
+
+# CHECK: vfms.f16 d0, d1, d2
+# CHECK: vfms.f16 q0, q1, q2
+[0x31,0xef,0x12,0x0c]
+[0x32,0xef,0x54,0x0c]
+
+# CHECK: vceq.f16 d2, d3, d4
+# CHECK: vceq.f16 q2, q3, q4
+[0x13,0xef,0x04,0x2e]
+[0x16,0xef,0x48,0x4e]
+
+# CHECK: vceq.f16 d2, d3, #0
+# CHECK: vceq.f16 q2, q3, #0
+[0xb5,0xff,0x03,0x25]
+[0xb5,0xff,0x46,0x45]
+
+# CHECK: vcge.f16 d2, d3, d4
+# CHECK: vcge.f16 q2, q3, q4
+[0x13,0xff,0x04,0x2e]
+[0x16,0xff,0x48,0x4e]
+
+# CHECK: vcge.f16 d2, d3, #0
+# CHECK: vcge.f16 q2, q3, #0
+[0xb5,0xff,0x83,0x24]
+[0xb5,0xff,0xc6,0x44]
+
+# CHECK: vcgt.f16 d2, d3, d4
+# CHECK: vcgt.f16 q2, q3, q4
+[0x33,0xff,0x04,0x2e]
+[0x36,0xff,0x48,0x4e]
+
+# CHECK: vcgt.f16 d2, d3, #0
+# CHECK: vcgt.f16 q2, q3, #0
+[0xb5,0xff,0x03,0x24]
+[0xb5,0xff,0x46,0x44]
+
+# CHECK: vcle.f16 d2, d3, #0
+# CHECK: vcle.f16 q2, q3, #0
+[0xb5,0xff,0x83,0x25]
+[0xb5,0xff,0xc6,0x45]
+
+# CHECK: vclt.f16 d2, d3, #0
+# CHECK: vclt.f16 q2, q3, #0
+[0xb5,0xff,0x03,0x26]
+[0xb5,0xff,0x46,0x46]
+
+# CHECK: vacge.f16 d0, d1, d2
+# CHECK: vacge.f16 q0, q1, q2
+[0x11,0xff,0x12,0x0e]
+[0x12,0xff,0x54,0x0e]
+
+# CHECK: vacgt.f16 d0, d1, d2
+# CHECK: vacgt.f16 q0, q1, q2
+[0x31,0xff,0x12,0x0e]
+[0x32,0xff,0x54,0x0e]
+
+# CHECK: vabd.f16 d0, d1, d2
+# CHECK: vabd.f16 q0, q1, q2
+[0x31,0xff,0x02,0x0d]
+[0x32,0xff,0x44,0x0d]
+
+# CHECK: vabs.f16 d0, d1
+# CHECK: vabs.f16 q0, q1
+[0xb5,0xff,0x01,0x07]
+[0xb5,0xff,0x42,0x07]
+
+# CHECK: vmax.f16 d0, d1, d2
+# CHECK: vmax.f16 q0, q1, q2
+[0x11,0xef,0x02,0x0f]
+[0x12,0xef,0x44,0x0f]
+
+# CHECK: vmin.f16 d0, d1, d2
+# CHECK: vmin.f16 q0, q1, q2
+[0x31,0xef,0x02,0x0f]
+[0x32,0xef,0x44,0x0f]
+
+# CHECK: vmaxnm.f16 d0, d1, d2
+# CHECK: vmaxnm.f16 q0, q1, q2
+[0x11,0xff,0x12,0x0f]
+[0x12,0xff,0x54,0x0f]
+
+# CHECK: vminnm.f16 d0, d1, d2
+# CHECK: vminnm.f16 q0, q1, q2
+[0x31,0xff,0x12,0x0f]
+[0x32,0xff,0x54,0x0f]
+
+# CHECK: vpadd.f16 d0, d1, d2
+[0x11,0xff,0x02,0x0d]
+
+# CHECK: vpmax.f16 d0, d1, d2
+[0x11,0xff,0x02,0x0f]
+
+# CHECK: vpmin.f16 d0, d1, d2
+[0x31,0xff,0x02,0x0f]
+
+# CHECK: vrecpe.f16 d0, d1
+# CHECK: vrecpe.f16 q0, q1
+[0xb7,0xff,0x01,0x05]
+[0xb7,0xff,0x42,0x05]
+
+# CHECK: vrecps.f16 d0, d1, d2
+# CHECK: vrecps.f16 q0, q1, q2
+[0x11,0xef,0x12,0x0f]
+[0x12,0xef,0x54,0x0f]
+
+# CHECK: vrsqrte.f16 d0, d1
+# CHECK: vrsqrte.f16 q0, q1
+[0xb7,0xff,0x81,0x05]
+[0xb7,0xff,0xc2,0x05]
+
+# CHECK: vrsqrts.f16 d0, d1, d2
+# CHECK: vrsqrts.f16 q0, q1, q2
+[0x31,0xef,0x12,0x0f]
+[0x32,0xef,0x54,0x0f]
+
+# CHECK: vneg.f16 d0, d1
+# CHECK: vneg.f16 q0, q1
+[0xb5,0xff,0x81,0x07]
+[0xb5,0xff,0xc2,0x07]
+
+# CHECK: vcvt.s16.f16 d0, d1
+# CHECK: vcvt.u16.f16 d0, d1
+# CHECK: vcvt.f16.s16 d0, d1
+# CHECK: vcvt.f16.u16 d0, d1
+# CHECK: vcvt.s16.f16 q0, q1
+# CHECK: vcvt.u16.f16 q0, q1
+# CHECK: vcvt.f16.s16 q0, q1
+# CHECK: vcvt.f16.u16 q0, q1
+[0xb7,0xff,0x01,0x07]
+[0xb7,0xff,0x81,0x07]
+[0xb7,0xff,0x01,0x06]
+[0xb7,0xff,0x81,0x06]
+[0xb7,0xff,0x42,0x07]
+[0xb7,0xff,0xc2,0x07]
+[0xb7,0xff,0x42,0x06]
+[0xb7,0xff,0xc2,0x06]
+
+# CHECK: vcvta.s16.f16 d0, d1
+# CHECK: vcvta.s16.f16 q0, q1
+# CHECK: vcvta.u16.f16 d0, d1
+# CHECK: vcvta.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x00]
+[0xb7,0xff,0x42,0x00]
+[0xb7,0xff,0x81,0x00]
+[0xb7,0xff,0xc2,0x00]
+
+# CHECK: vcvtm.s16.f16 d0, d1
+# CHECK: vcvtm.s16.f16 q0, q1
+# CHECK: vcvtm.u16.f16 d0, d1
+# CHECK: vcvtm.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x03]
+[0xb7,0xff,0x42,0x03]
+[0xb7,0xff,0x81,0x03]
+[0xb7,0xff,0xc2,0x03]
+
+# CHECK: vcvtn.s16.f16 d0, d1
+# CHECK: vcvtn.s16.f16 q0, q1
+# CHECK: vcvtn.u16.f16 d0, d1
+# CHECK: vcvtn.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x01]
+[0xb7,0xff,0x42,0x01]
+[0xb7,0xff,0x81,0x01]
+[0xb7,0xff,0xc2,0x01]
+
+# CHECK: vcvtp.s16.f16 d0, d1
+# CHECK: vcvtp.s16.f16 q0, q1
+# CHECK: vcvtp.u16.f16 d0, d1
+# CHECK: vcvtp.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x02]
+[0xb7,0xff,0x42,0x02]
+[0xb7,0xff,0x81,0x02]
+[0xb7,0xff,0xc2,0x02]
+
+# CHECK: vcvt.s16.f16 d0, d1, #1
+# CHECK: vcvt.u16.f16 d0, d1, #2
+# CHECK: vcvt.f16.s16 d0, d1, #3
+# CHECK: vcvt.f16.u16 d0, d1, #4
+# CHECK: vcvt.s16.f16 q0, q1, #5
+# CHECK: vcvt.u16.f16 q0, q1, #6
+# CHECK: vcvt.f16.s16 q0, q1, #7
+# CHECK: vcvt.f16.u16 q0, q1, #8
+[0xbf,0xef,0x11,0x0d]
+[0xbe,0xff,0x11,0x0d]
+[0xbd,0xef,0x11,0x0c]
+[0xbc,0xff,0x11,0x0c]
+[0xbb,0xef,0x52,0x0d]
+[0xba,0xff,0x52,0x0d]
+[0xb9,0xef,0x52,0x0c]
+[0xb8,0xff,0x52,0x0c]
+
+# CHECK: vrinta.f16 d0, d1
+# CHECK: vrinta.f16 q0, q1
+[0xb6,0xff,0x01,0x05]
+[0xb6,0xff,0x42,0x05]
+
+# CHECK: vrintm.f16 d0, d1
+# CHECK: vrintm.f16 q0, q1
+[0xb6,0xff,0x81,0x06]
+[0xb6,0xff,0xc2,0x06]
+
+# CHECK: vrintn.f16 d0, d1
+# CHECK: vrintn.f16 q0, q1
+[0xb6,0xff,0x01,0x04]
+[0xb6,0xff,0x42,0x04]
+
+# CHECK: vrintp.f16 d0, d1
+# CHECK: vrintp.f16 q0, q1
+[0xb6,0xff,0x81,0x07]
+[0xb6,0xff,0xc2,0x07]
+
+# CHECK: vrintx.f16 d0, d1
+# CHECK: vrintx.f16 q0, q1
+[0xb6,0xff,0x81,0x04]
+[0xb6,0xff,0xc2,0x04]
+
+# CHECK: vrintz.f16 d0, d1
+# CHECK: vrintz.f16 q0, q1
+[0xb6,0xff,0x81,0x05]
+[0xb6,0xff,0xc2,0x05]
+
+# Existing VMOV(immediate, Advanced SIMD) instructions within the encoding
+# space of the new FP16 VCVT(between floating - point and fixed - point,
+# Advanced SIMD):
+#  1 -- VCVT op
+#  2 -- VCVT FP size
+#            4 -- Q
+#            2 -- VMOV op
+[0xc7,0xef,0x10,0x0c]
+[0xc7,0xef,0x10,0x0d]
+[0xc7,0xef,0x10,0x0e]
+[0xc7,0xef,0x10,0x0f]
+[0xc7,0xef,0x20,0x0c]
+[0xc7,0xef,0x20,0x0d]
+[0xc7,0xef,0x20,0x0e]
+[0xc7,0xef,0x20,0x0f]
+[0xc7,0xef,0x50,0x0c]
+[0xc7,0xef,0x50,0x0d]
+[0xc7,0xef,0x50,0x0e]
+[0xc7,0xef,0x50,0x0f]
+[0xc7,0xef,0x70,0x0c]
+[0xc7,0xef,0x70,0x0d]
+[0xc7,0xef,0x70,0x0e]
+[0xc7,0xef,0x70,0x0f]
+# CHECK: vmov.i32        d16, #0x70ff
+# CHECK: vmov.i32        d16, #0x70ffff
+# CHECK: vmov.i8 d16, #0x70
+# CHECK: vmov.f32        d16, #1.000000e+00
+# CHECK: vmull.s8        q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0d,0xc7,0xf2]
+# CHECK: vmull.p8        q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0f,0xc7,0xf2]
+# CHECK: vmov.i32        q8, #0x70ff
+# CHECK: vmov.i32        q8, #0x70ffff
+# CHECK: vmov.i8 q8, #0x70
+# CHECK: vmov.f32        q8, #1.000000e+00
+# CHECK: vmvn.i32        q8, #0x70ff
+# CHECK: vmvn.i32        q8, #0x70ffff
+# CHECK: vmov.i64        q8, #0xffffff0000000
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x70,0x0f,0xc7,0xf2]