"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
multiclass VLD1QWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), itin,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
}
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
}
}
multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
}
}
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
}
}
IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def VST1d8 : VST1D<{0,0,0,?}, "8">;
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
multiclass VST1QWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VST1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VST1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
}
multiclass VST2QWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
}
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
"", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
(ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+// Same as above, but not predicated.
+class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+
+class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
+class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Same as N2VQIntXnp but with Vd as a src register.
+class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
// Narrow 2-register operations.
class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
+
+class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
+ ResTy, OpTy, IntOp, Commutable,
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
imm:$lane)))))]> {
let isCommutable = 0;
}
+
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
+
+class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
+ ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Same as N3VQIntnp but with Vd as a src register.
+class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr,
+ Dt, ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
+ (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
[(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
+
class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
[(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
+
+// Same as above, but not predicated.
+class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
+ ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqadd", "u", int_arm_neon_vqaddu, 1>;
// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
-defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
- int_arm_neon_vaddhn, 1>;
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
int_arm_neon_vraddhn, 1>;
+def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
// Vector Multiply Operations.
// VMUL : Vector Multiply (integer, polynomial and floating-point)
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfd DPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfq QPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+
+
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", NEONvmulls, 1>;
-defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", NEONvmullu, 1>;
-def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
- v8i16, v8i8, int_arm_neon_vmullp, 1>;
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "NEONData" in {
+ defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+ defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
+ def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+ def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
+ "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
+ Requires<[HasV8, HasCrypto]>;
+}
defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
- "vqdmlal", "s", int_arm_neon_vqdmlal>;
-defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+ "vqdmlal", "s", null_frag>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// VMLS : Vector Multiply Subtract (integer and floating-point)
defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
- "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
-defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+ "vqdmlsl", "s", null_frag>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vqsub", "u", int_arm_neon_vqsubu, 0>;
// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
-defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
- int_arm_neon_vsubhn, 0>;
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
int_arm_neon_vrsubhn, 0>;
+def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
// Vector Comparisons.
// VCEQ : Vector Compare Equal
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
+let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
"$Vd, $Vm, #0", NEONvceqz>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
"$Vd, $Vm, #0", NEONvcgez>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
"$Vd, $Vm, #0", NEONvclez>;
+}
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
"$Vd, $Vm, #0", NEONvcgtz>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
"$Vd, $Vm, #0", NEONvcltz>;
+}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+
// Vector Bitwise Operations.
def vnotd : PatFrag<(ops node:$in),
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
+def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
+ (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
+ (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
(VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
+def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VCNTiQ,
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
+def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
+ (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
+ (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
(VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
+def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
"vmax", "f32",
v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+// VMAXNM
+let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f32",
+ v2f32, v2f32, int_arm_neon_vmaxnm, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f32",
+ v4f32, v4f32, int_arm_neon_vmaxnm, 1>,
+ Requires<[HasV8, HasNEON]>;
+}
+
// VMIN : Vector Minimum
defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vmin", "f32",
v4f32, v4f32, int_arm_neon_vmins, 1>;
+// VMINNM
+let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f32",
+ v2f32, v2f32, int_arm_neon_vminnm, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f32",
+ v4f32, v4f32, int_arm_neon_vminnm, 1>,
+ Requires<[HasV8, HasNEON]>;
+}
+
// Vector Pairwise Operations.
// VPADD : Vector Pairwise Add
defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
int_arm_neon_vabs>;
-def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAD, "vabs", "f32",
- v2f32, v2f32, int_arm_neon_vabs>;
-def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAQ, "vabs", "f32",
- v4f32, v4f32, int_arm_neon_vabs>;
+def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v2f32, v2f32, fabs>;
+def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v4f32, v4f32, fabs>;
+
+def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
+ (v2i32 (bitconvert (v8i8 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 7))))))),
+ (VABSv8i8 DPR:$src)>;
+def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
+ (v2i32 (bitconvert (v4i16 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 15))))))),
+ (VABSv4i16 DPR:$src)>;
+def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
+ (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
+ (VABSv2i32 DPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
+ (v4i32 (bitconvert (v16i8 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 7))))))),
+ (VABSv16i8 QPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
+ (v4i32 (bitconvert (v8i16 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 15))))))),
+ (VABSv8i16 QPR:$src)>;
+def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
+ (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
+ (VABSv4i32 QPR:$src)>;
+
+def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
+def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
// VQABS : Vector Saturating Absolute Value
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
// Vector Move Operations.
// VMOV : Vector Move (Register)
-def : InstAlias<"vmov${p} $Vd, $Vm",
- (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
-def : InstAlias<"vmov${p} $Vd, $Vm",
- (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
// VMOV : Vector Move (Immediate)
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{0};
}
-
-def VSETLNi8Q : PseudoNeonI<(outs QPR:$V),
- (ins QPR:$src1, GPR:$R, VectorIndex8:$lane),
- IIC_VMOVISL, "",
- [(set QPR:$V, (vector_insert (v16i8 QPR:$src1),
- GPR:$R, imm:$lane))]>;
-def VSETLNi16Q : PseudoNeonI<(outs QPR:$V),
- (ins QPR:$src1, GPR:$R, VectorIndex16:$lane),
- IIC_VMOVISL, "",
- [(set QPR:$V, (vector_insert (v8i16 QPR:$src1),
- GPR:$R, imm:$lane))]>;
}
-
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+ (v16i8 (INSERT_SUBREG QPR:$src1,
+ (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i8_reg imm:$lane))),
+ GPR:$src2, (SubReg_i8_lane imm:$lane))),
+ (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+ (v8i16 (INSERT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i16_reg imm:$lane))),
+ GPR:$src2, (SubReg_i16_lane imm:$lane))),
+ (DSubReg_i16_reg imm:$lane)))>;
def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
- (v4i32 (INSERT_SUBREG QPR:$src1,
- GPR:$src2,
- (SSubReg_f32_reg imm:$lane)))>;
+ (v4i32 (INSERT_SUBREG QPR:$src1,
+ (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i32_reg imm:$lane))),
+ GPR:$src2, (SubReg_i32_lane imm:$lane))),
+ (DSubReg_i32_reg imm:$lane)))>;
def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
(INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
+// VCVT{A, N, P, M}
+multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
+ SDPatternOperator IntU> {
+ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
+ def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
+ def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
+ def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
+ }
+}
+
+defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
+defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
+defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
+defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
+
// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
let DecoderMethod = "DecodeVCVTD" in {
def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
}
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
+ (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
+ (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
+ (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
+ (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
+ (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
+ (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
+ (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
+ (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
+
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
IIC_VUNAQ, "vcvt", "f16.f32",
IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
(Ty DPR:$Vm), imm:$index)))]> {
- bits<4> index;
- let Inst{11-8} = index{3-0};
+ bits<3> index;
+ let Inst{11} = 0b0;
+ let Inst{10-8} = index{2-0};
}
class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
}
def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
- let Inst{11-8} = index{3-0};
+ let Inst{10-8} = index{2-0};
}
def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
- let Inst{11-9} = index{2-0};
+ let Inst{10-9} = index{1-0};
let Inst{8} = 0b0;
}
def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
- let Inst{11-10} = index{1-0};
+ let Inst{10} = index{0};
let Inst{9-8} = 0b00;
}
def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
IIC_VTBX4, "$orig = $dst", []>;
} // DecoderMethod = "DecodeTBLInstruction"
+// VRINT : Vector Rounding
+multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
+ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f32",
+ v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
+ let Inst{9-7} = op9_7;
+ }
+ def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f32",
+ v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
+ let Inst{9-7} = op9_7;
+ }
+ }
+
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
+ (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
+}
+
+defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
+defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
+defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
+defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
+defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
+defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
+
+// Cryptography instructions
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "v8Crypto" in {
+ class AES<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
+ : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
+ Requires<[HasV8, HasCrypto]>;
+}
+
+def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
+def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
+def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
+def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
+
+def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>;
+def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
+def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
+def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>;
+def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>;
+def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>;
+def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
+def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
+def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
+def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
+
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+// Fold extracting an element out of a v2i32 into a vfp register.
+def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
+ (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
(ins VecListFourQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
-// VMOV takes an optional datatype suffix
+// VMOV/VMVN takes an optional datatype suffix
defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
(VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
+ (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
+ (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
+
// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
// D-register versions.
def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",