X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMInstrNEON.td;h=32b8d67b05ad4c527e53dc7c4a1c2ad3d8e4958c;hb=10ddc4d7f232507933c266180d0052f12e65c4ab;hp=97f74d8c712b150c260c35fc91eab07fd02dbe8e;hpb=d10eab0a95dcfff6390cc73b50ca07fd8b98b0bc;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 97f74d8c712..32b8d67b05a 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -626,7 +626,7 @@ class VLD1D op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VLD1Q op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), @@ -634,7 +634,7 @@ class VLD1Q op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; @@ -655,7 +655,7 @@ multiclass VLD1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), @@ -663,7 +663,7 @@ multiclass VLD1DWB op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -674,7 +674,7 @@ multiclass VLD1QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), @@ -682,7 +682,7 @@ multiclass VLD1QWB op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -703,7 +703,7 @@ class VLD1D3 op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -712,7 +712,7 @@ multiclass VLD1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -720,7 +720,7 @@ multiclass VLD1D3WB op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -744,7 +744,7 @@ class VLD1D4 op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -753,7 +753,7 @@ multiclass VLD1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -761,7 +761,7 @@ multiclass VLD1D4WB op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -786,7 +786,7 @@ class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; @@ -810,7 +810,7 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), @@ -818,7 +818,7 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -853,7 +853,7 @@ class VLD3D op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; @@ -872,7 +872,7 @@ class VLD3DWB op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; @@ -912,7 +912,7 @@ class VLD4D op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; @@ -931,7 +931,7 @@ class VLD4DWB op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; @@ -1580,14 +1580,14 @@ class VST1D op7_4, string Dt> IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; @@ -1608,7 +1608,7 @@ multiclass VST1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), @@ -1617,7 +1617,7 @@ multiclass VST1DWB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1628,7 +1628,7 @@ multiclass VST1QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), @@ -1637,7 +1637,7 @@ multiclass VST1QWB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1659,7 +1659,7 @@ class VST1D3 op7_4, string Dt> IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1668,7 +1668,7 @@ multiclass VST1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1677,7 +1677,7 @@ multiclass VST1D3WB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1704,7 +1704,7 @@ class VST1D4 op7_4, string Dt> []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1713,7 +1713,7 @@ multiclass VST1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1722,7 +1722,7 @@ multiclass VST1D4WB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1748,7 +1748,7 @@ class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; @@ -1772,7 +1772,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -1780,7 +1780,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1791,7 +1791,7 @@ multiclass VST2QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), @@ -1800,7 +1800,7 @@ multiclass VST2QWB op7_4, string Dt> { "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1835,7 +1835,7 @@ class VST3D op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; @@ -1854,7 +1854,7 @@ class VST3DWB op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; @@ -1894,7 +1894,7 @@ class VST4D op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; @@ -1913,7 +1913,7 @@ class VST4DWB op11_8, bits<4> op7_4, string Dt> "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; @@ -4264,6 +4264,7 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$Vd, $Vm, #0", NEONvceqz>; @@ -4277,10 +4278,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", "$Vd, $Vm, #0", NEONvcgez>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$Vd, $Vm, #0", NEONvclez>; +} // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -4292,10 +4295,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", "$Vd, $Vm, #0", NEONvcgtz>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", "$Vd, $Vm, #0", NEONvcltz>; +} // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", @@ -4311,6 +4316,24 @@ def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; + +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; + // Vector Bitwise Operations. def vnotd : PatFrag<(ops node:$in), @@ -4488,10 +4511,36 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), + (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), + (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), + (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; + +def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), @@ -4500,9 +4549,35 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; +def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), + (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), + (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), + (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; + def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -4825,12 +4900,38 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", int_arm_neon_vabs>; -def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs", "f32", - v2f32, v2f32, int_arm_neon_vabs>; -def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAQ, "vabs", "f32", - v4f32, v4f32, int_arm_neon_vabs>; +def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v2f32, v2f32, fabs>; +def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v4f32, v4f32, fabs>; + +def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), + (v2i32 (bitconvert (v8i8 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 7))))))), + (VABSv8i8 DPR:$src)>; +def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), + (v2i32 (bitconvert (v4i16 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 15))))))), + (VABSv4i16 DPR:$src)>; +def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), + (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), + (VABSv2i32 DPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), + (v4i32 (bitconvert (v16i8 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 7))))))), + (VABSv16i8 QPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), + (v4i32 (bitconvert (v8i16 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 15))))))), + (VABSv8i16 QPR:$src)>; +def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), + (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), + (VABSv4i32 QPR:$src)>; + +def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; +def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, @@ -5016,7 +5117,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), - imm:$lane))]> { + imm:$lane))]>, + Requires<[HasNEON, HasFastVGETLNi32]> { let Inst{21} = lane{0}; } // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -5039,7 +5141,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane))>; + (SubReg_i32_lane imm:$lane))>, + Requires<[HasNEON, HasFastVGETLNi32]>; +def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; @@ -5078,23 +5189,25 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), GPR:$R, imm:$lane))]> { let Inst{21} = lane{0}; } - -def VSETLNi8Q : PseudoNeonI<(outs QPR:$V), - (ins QPR:$src1, GPR:$R, VectorIndex8:$lane), - IIC_VMOVISL, "", - [(set QPR:$V, (vector_insert (v16i8 QPR:$src1), - GPR:$R, imm:$lane))]>; -def VSETLNi16Q : PseudoNeonI<(outs QPR:$V), - (ins QPR:$src1, GPR:$R, VectorIndex16:$lane), - IIC_VMOVISL, "", - [(set QPR:$V, (vector_insert (v8i16 QPR:$src1), - GPR:$R, imm:$lane))]>; } - +def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), + (v16i8 (INSERT_SUBREG QPR:$src1, + (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i8_reg imm:$lane))), + GPR:$src2, (SubReg_i8_lane imm:$lane))), + (DSubReg_i8_reg imm:$lane)))>; +def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), + (v8i16 (INSERT_SUBREG QPR:$src1, + (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i16_reg imm:$lane))), + GPR:$src2, (SubReg_i16_lane imm:$lane))), + (DSubReg_i16_reg imm:$lane)))>; def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, - GPR:$src2, - (SSubReg_f32_reg imm:$lane)))>; + (v4i32 (INSERT_SUBREG QPR:$src1, + (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i32_reg imm:$lane))), + GPR:$src2, (SubReg_i32_lane imm:$lane))), + (DSubReg_i32_reg imm:$lane)))>; def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), @@ -5148,14 +5261,23 @@ class VDUPQ opcod1, bits<2> opcod3, string Dt, ValueType Ty> def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, + Requires<[HasNEON, HasFastVDUP32]>; def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +// NEONvdup patterns for uarchs with fast VDUP.32. +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, + Requires<[HasNEON,HasFastVDUP32]>; def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; +// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. +def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; + // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND op19_16, string OpcodeStr, string Dt, @@ -5387,8 +5509,9 @@ class VEXTd IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), (Ty DPR:$Vm), imm:$index)))]> { - bits<4> index; - let Inst{11-8} = index{3-0}; + bits<3> index; + let Inst{11} = 0b0; + let Inst{10-8} = index{2-0}; } class VEXTq @@ -5403,14 +5526,14 @@ class VEXTq } def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { - let Inst{11-8} = index{3-0}; + let Inst{10-8} = index{2-0}; } def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { - let Inst{11-9} = index{2-0}; + let Inst{10-9} = index{1-0}; let Inst{8} = 0b0; } def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { - let Inst{11-10} = index{1-0}; + let Inst{10} = index{0}; let Inst{9-8} = 0b00; } def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), @@ -5592,6 +5715,11 @@ def : N2VSPat; def : N2VSPat; def : N2VSPat; +// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. +def : Pat<(f32 (bitconvert GPR:$a)), + (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, + Requires<[HasNEON, DontUseVMOVSR]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -5659,6 +5787,10 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +// Fold extracting an element out of a v2i32 into a vfp register. +def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), + (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; + // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: @@ -6566,12 +6698,17 @@ def VST4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -// VMOV takes an optional datatype suffix +// VMOV/VMVN takes an optional datatype suffix defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; + // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",