def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
//===----------------------------------------------------------------------===//
// NEON operand definitions
//===----------------------------------------------------------------------===//
def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>;
def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>;
+// These (dreg triple/quadruple) are for disassembly only.
+class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+ "\\{$dst1, $dst2, $dst3\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+ "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VLD1d8T : VLD1D3<0b0000, "vld1", "8">;
+def VLD1d16T : VLD1D3<0b0100, "vld1", "16">;
+def VLD1d32T : VLD1D3<0b1000, "vld1", "32">;
+//def VLD1d64T : VLD1D3<0b1100, "vld1", "64">;
+
+def VLD1d8Q : VLD1D4<0b0000, "vld1", "8">;
+def VLD1d16Q : VLD1D4<0b0100, "vld1", "16">;
+def VLD1d32Q : VLD1D4<0b1000, "vld1", "32">;
+//def VLD1d64Q : VLD1D4<0b1100, "vld1", "64">;
+
+
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// VLD2 : Vector Load (multiple 2-element structures)
def VLD2q16 : VLD2Q<0b0100, "vld2", "16">;
def VLD2q32 : VLD2Q<0b1000, "vld2", "32">;
+// These (double-spaced dreg pair) are for disassembly only.
+class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2),
+ (ins addrmode6:$addr), IIC_VLD2,
+ OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+
+def VLD2d8D : VLD2Ddbl<0b0000, "vld2", "8">;
+def VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">;
+def VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">;
+
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>;
} // hasExtraSrcRegAllocReq
+// These (dreg triple/quadruple) are for disassembly only.
+class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+ OpcodeStr, Dt,
+ "\\{$src1, $src2, $src3\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST, OpcodeStr, Dt,
+ "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VST1d8T : VST1D3<0b0000, "vst1", "8">;
+def VST1d16T : VST1D3<0b0100, "vst1", "16">;
+def VST1d32T : VST1D3<0b1000, "vst1", "32">;
+//def VST1d64T : VST1D3<0b1100, "vst1", "64">;
+
+def VST1d8Q : VST1D4<0b0000, "vst1", "8">;
+def VST1d16Q : VST1D4<0b0100, "vst1", "16">;
+def VST1d32Q : VST1D4<0b1000, "vst1", "32">;
+//def VST1d64Q : VST1D4<0b1100, "vst1", "64">;
+
+
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
// VST2 : Vector Store (multiple 2-element structures)
def VST2q16 : VST2Q<0b0100, "vst2", "16">;
def VST2q32 : VST2Q<0b1000, "vst2", "32">;
+// These (double-spaced dreg pair) are for disassembly only.
+class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b00, 0b1001, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+ OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+
+def VST2d8D : VST2Ddbl<0b0000, "vst2", "8">;
+def VST2d16D : VST2Ddbl<0b0100, "vst2", "16">;
+def VST2d32D : VST2Ddbl<0b1000, "vst2", "32">;
+
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0100,op7_4, (outs),
(ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
-// Basic 2-register intrinsics: single-, double- and quad-register.
-class N2VSInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin,
- OpcodeStr, Dt, "$dst, $src", "", []>;
+// Basic 2-register intrinsics, both double- and quad-register.
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
// S = single int (32 bit) elements
// D = double int (64 bit) elements
+// Neon 2-register vector operations -- for disassembly only.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4, string opc, string Dt,
+ string asm> {
+ // 64-bit vector types.
+ def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "", []>;
+ def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "", []>;
+ def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "", []>;
+ def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, "f32", asm, "", []> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+
+ // 128-bit vector types.
+ def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "", []>;
+ def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "", []>;
+ def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "", []>;
+ def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, "f32", asm, "", []> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+}
+
// Neon 3-register vector operations.
// First with only element sizes of 8, 16 and 32 bits:
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
+// For disassembly only.
+defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+ "$dst, $src, #0">;
+
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
v2i32, v2f32, NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
+// For disassembly only.
+defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+ "$dst, $src, #0">;
+// For disassembly only.
+defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+ "$dst, $src, #0">;
+
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>;
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
+// For disassembly only.
+defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+ "$dst, $src, #0">;
+// For disassembly only.
+defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+ "$dst, $src, #0">;
+
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32",
v2i32, v2f32, int_arm_neon_vacged, 0>;
IIC_VCNTiQ, "vcnt", "8",
v16i8, v16i8, int_arm_neon_vcnt>;
+// Vector Swap -- for disassembly only.
+def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ "vswp", "$dst, $src", "", []>;
+def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ "vswp", "$dst, $src", "", []>;
+
// Vector Move Operations.
// VMOV : Vector Move (Register)
}
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
(v16i8 (INSERT_SUBREG QPR:$src1,
- (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+ (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
(DSubReg_i8_reg imm:$lane))),
- GPR:$src2, (SubReg_i8_lane imm:$lane)),
+ GPR:$src2, (SubReg_i8_lane imm:$lane))),
(DSubReg_i8_reg imm:$lane)))>;
def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
(v8i16 (INSERT_SUBREG QPR:$src1,
- (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
(DSubReg_i16_reg imm:$lane))),
- GPR:$src2, (SubReg_i16_lane imm:$lane)),
+ GPR:$src2, (SubReg_i16_lane imm:$lane))),
(DSubReg_i16_reg imm:$lane)))>;
def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
(v4i32 (INSERT_SUBREG QPR:$src1,
- (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+ (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
(DSubReg_i32_reg imm:$lane))),
- GPR:$src2, (SubReg_i32_lane imm:$lane)),
+ GPR:$src2, (SubReg_i32_lane imm:$lane))),
(DSubReg_i32_reg imm:$lane)))>;
def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
: NEONFPPat<(ResTy (OpNode SPR:$a)),
- (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0)),
+ (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0))),
arm_ssubreg_0)>;
class N3VSPat<SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
- (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$b, arm_ssubreg_0)),
+ (EXTRACT_SUBREG (v2f32
+ (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$b, arm_ssubreg_0))),
arm_ssubreg_0)>;
class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
// Vector Absolute used for single-precision FP
let neverHasSideEffects = 1 in
-def VABSfd_sfp : N2VSInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, IIC_VUNAD,
- "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>;
+def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+ "vabs", "f32", "$dst, $src", "", []>;
def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
// Vector Negate used for single-precision FP
"vneg", "f32", "$dst, $src", "", []>;
def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
+// Vector Maximum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmax", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmax, VMAXfd_sfp>;
+
+// Vector Minimum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmin", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmin, VMINfd_sfp>;
+
// Vector Convert between single-precision FP and integer
let neverHasSideEffects = 1 in
def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",