X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMInstrNEON.td;h=32b8d67b05ad4c527e53dc7c4a1c2ad3d8e4958c;hb=5b3fca50a08865f0db55fc92ad1c037a04e12177;hp=c540d0ddd70aa5208af11da812a151f9159de7a9;hpb=06a6a300c5f7100e4665667c689369e078d2ad59;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c540d0ddd70..32b8d67b05a 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -398,6 +398,41 @@ def VecListFourQWordIndexed : Operand { let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 8; +}]>; +def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 8; +}]>; +def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 4; +}]>; +def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 4; +}]>; +def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 2; +}]>; +def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 2; +}]>; +def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 1; +}]>; +def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 1; +}]>; +def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -591,7 +626,7 @@ class VLD1D op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VLD1Q op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), @@ -599,7 +634,7 @@ class VLD1Q op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; @@ -620,7 +655,7 @@ multiclass VLD1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), @@ -628,7 +663,7 @@ multiclass VLD1DWB op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -639,7 +674,7 @@ multiclass VLD1QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), @@ -647,7 +682,7 @@ multiclass VLD1QWB op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -668,7 +703,7 @@ class VLD1D3 op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -677,7 +712,7 @@ multiclass VLD1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -685,7 +720,7 @@ multiclass VLD1D3WB op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -709,7 +744,7 @@ class VLD1D4 op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -718,7 +753,7 @@ multiclass VLD1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -726,7 +761,7 @@ multiclass VLD1D4WB op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -751,7 +786,7 @@ class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; @@ -775,7 +810,7 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), @@ -783,7 +818,7 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -818,7 +853,7 @@ class VLD3D op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; @@ -837,7 +872,7 @@ class VLD3DWB op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; @@ -877,7 +912,7 @@ class VLD4D op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; @@ -896,7 +931,7 @@ class VLD4DWB op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; @@ -1545,14 +1580,14 @@ class VST1D op7_4, string Dt> IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; @@ -1573,7 +1608,7 @@ multiclass VST1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), @@ -1582,7 +1617,7 @@ multiclass VST1DWB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1593,7 +1628,7 @@ multiclass VST1QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), @@ -1602,7 +1637,7 @@ multiclass VST1QWB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1624,7 +1659,7 @@ class VST1D3 op7_4, string Dt> IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1633,7 +1668,7 @@ multiclass VST1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1642,7 +1677,7 @@ multiclass VST1D3WB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1669,7 +1704,7 @@ class VST1D4 op7_4, string Dt> []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1678,7 +1713,7 @@ multiclass VST1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1687,7 +1722,7 @@ multiclass VST1D4WB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1713,7 +1748,7 @@ class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; @@ -1737,7 +1772,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -1745,7 +1780,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1756,7 +1791,7 @@ multiclass VST2QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), @@ -1765,7 +1800,7 @@ multiclass VST2QWB op7_4, string Dt> { "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1800,7 +1835,7 @@ class VST3D op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; @@ -1819,7 +1854,7 @@ class VST3DWB op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; @@ -1859,7 +1894,7 @@ class VST4D op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; @@ -1878,7 +1913,7 @@ class VST4DWB op11_8, bits<4> op7_4, string Dt> "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; @@ -1959,7 +1994,7 @@ def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, @@ -2002,7 +2037,7 @@ def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, extractelt, addrmode6oneL32> { @@ -2238,6 +2273,38 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +// Use vld1/vst1 for unaligned f64 load / store +def : Pat<(f64 (hword_alignedload addrmode6:$addr)), + (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (byte_alignedload addrmode6:$addr)), + (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), + (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; +def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; + +// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 +// load / store if it's legal. +def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), + (VLD1q64 addrmode6:$addr)>; +def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q64 addrmode6:$addr, QPR:$value)>; +def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), + (VLD1q32 addrmode6:$addr)>; +def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q32 addrmode6:$addr, QPR:$value)>; +def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), + (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; +def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), + (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; //===----------------------------------------------------------------------===// // NEON pattern fragments @@ -4197,6 +4264,7 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$Vd, $Vm, #0", NEONvceqz>; @@ -4210,10 +4278,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", "$Vd, $Vm, #0", NEONvcgez>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$Vd, $Vm, #0", NEONvclez>; +} // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -4225,10 +4295,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", "$Vd, $Vm, #0", NEONvcgtz>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", "$Vd, $Vm, #0", NEONvcltz>; +} // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", @@ -4244,6 +4316,24 @@ def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; + +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; + // Vector Bitwise Operations. def vnotd : PatFrag<(ops node:$in), @@ -4421,10 +4511,36 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), + (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), + (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), + (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; + +def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), @@ -4433,9 +4549,35 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; +def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), + (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), + (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), + (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; + def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -4758,12 +4900,38 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", int_arm_neon_vabs>; -def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs", "f32", - v2f32, v2f32, int_arm_neon_vabs>; -def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAQ, "vabs", "f32", - v4f32, v4f32, int_arm_neon_vabs>; +def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v2f32, v2f32, fabs>; +def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v4f32, v4f32, fabs>; + +def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), + (v2i32 (bitconvert (v8i8 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 7))))))), + (VABSv8i8 DPR:$src)>; +def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), + (v2i32 (bitconvert (v4i16 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 15))))))), + (VABSv4i16 DPR:$src)>; +def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), + (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), + (VABSv2i32 DPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), + (v4i32 (bitconvert (v16i8 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 7))))))), + (VABSv16i8 QPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), + (v4i32 (bitconvert (v8i16 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 15))))))), + (VABSv8i16 QPR:$src)>; +def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), + (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), + (VABSv4i32 QPR:$src)>; + +def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; +def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, @@ -4829,10 +4997,10 @@ defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, // VCNT : Vector Count One Bits def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiD, "vcnt", "8", - v8i8, v8i8, int_arm_neon_vcnt>; + v8i8, v8i8, ctpop>; def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", - v16i8, v16i8, int_arm_neon_vcnt>; + v16i8, v16i8, ctpop>; // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, @@ -4949,7 +5117,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), - imm:$lane))]> { + imm:$lane))]>, + Requires<[HasNEON, HasFastVGETLNi32]> { let Inst{21} = lane{0}; } // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -4972,7 +5141,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane))>; + (SubReg_i32_lane imm:$lane))>, + Requires<[HasNEON, HasFastVGETLNi32]>; +def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; @@ -5083,14 +5261,23 @@ class VDUPQ opcod1, bits<2> opcod3, string Dt, ValueType Ty> def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, + Requires<[HasNEON, HasFastVDUP32]>; def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +// NEONvdup patterns for uarchs with fast VDUP.32. +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, + Requires<[HasNEON,HasFastVDUP32]>; def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; +// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. +def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; + // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND op19_16, string OpcodeStr, string Dt, @@ -5322,8 +5509,9 @@ class VEXTd IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), (Ty DPR:$Vm), imm:$index)))]> { - bits<4> index; - let Inst{11-8} = index{3-0}; + bits<3> index; + let Inst{11} = 0b0; + let Inst{10-8} = index{2-0}; } class VEXTq @@ -5338,14 +5526,14 @@ class VEXTq } def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { - let Inst{11-8} = index{3-0}; + let Inst{10-8} = index{2-0}; } def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { - let Inst{11-9} = index{2-0}; + let Inst{10-9} = index{1-0}; let Inst{8} = 0b0; } def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { - let Inst{11-10} = index{1-0}; + let Inst{10} = index{0}; let Inst{9-8} = 0b00; } def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), @@ -5527,6 +5715,11 @@ def : N2VSPat; def : N2VSPat; def : N2VSPat; +// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. +def : Pat<(f32 (bitconvert GPR:$a)), + (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, + Requires<[HasNEON, DontUseVMOVSR]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -5594,29 +5787,34 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +// Fold extracting an element out of a v2i32 into a vfp register. +def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), + (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; + // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: -// Lengthen_Single<"8", "i16", "i8"> = -// Pat<(v8i16 (extloadvi8 addrmode6oneL32:$addr)) -// (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, -// (f64 (IMPLICIT_DEF)), (i32 0)))>; +// Lengthen_Single<"8", "i16", "8"> = +// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) +// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0)))>; multiclass Lengthen_Single { + let AddedComplexity = 10 in { def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("extloadvi" # SrcTy) addrmode6:$addr)), (!cast("VMOVLuv" # DestLanes # DestTy) - (VLD1LNd32 addrmode6oneL32:$addr, - (f64 (IMPLICIT_DEF)), (i32 0)))>; + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("zextloadvi" # SrcTy) addrmode6:$addr)), (!cast("VMOVLuv" # DestLanes # DestTy) - (VLD1LNd32 addrmode6oneL32:$addr, - (f64 (IMPLICIT_DEF)), (i32 0)))>; + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("sextloadvi" # SrcTy) addrmode6:$addr)), (!cast("VMOVLsv" # DestLanes # DestTy) - (VLD1LNd32 addrmode6oneL32:$addr, - (f64 (IMPLICIT_DEF)), (i32 0)))>; + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + } } // extload, zextload and sextload for a lengthening load which only uses @@ -5648,8 +5846,8 @@ multiclass Lengthen_HalfSingle = -// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) +// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) // (EXTRACT_SUBREG (VMOVLuv4i32 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, // (f64 (IMPLICIT_DEF)), @@ -5663,19 +5861,19 @@ multiclass Lengthen_Double("extloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0))>; def _Z : Pat<(!cast("v" # DestLanes # DestTy) (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0))>; def _S : Pat<(!cast("v" # DestLanes # DestTy) (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0))>; } @@ -5684,9 +5882,9 @@ multiclass Lengthen_Double = -// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) +// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) // (EXTRACT_SUBREG (VMOVLuv4i32 -// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, // (f64 (IMPLICIT_DEF)), (i32 0))), // dsub_0)), // dsub_0)>; @@ -5694,34 +5892,33 @@ multiclass Lengthen_HalfDouble { def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("extloadv" # SrcTy) addrmode6:$addr)), (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0)>; def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("zextloadv" # SrcTy) addrmode6:$addr)), (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0)>; def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("sextloadv" # SrcTy) addrmode6:$addr)), (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0)>; } -defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16 -defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32 -defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64 +defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 +defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 +defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 -defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 // Double lengthening - v4i8 -> v4i16 -> v4i32 @@ -5732,17 +5929,17 @@ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 -def : Pat<(v2i64 (extloadvi8 addrmode6oneL32:$addr)), +def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd32 addrmode6oneL32:$addr, + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; -def : Pat<(v2i64 (zextloadvi8 addrmode6oneL32:$addr)), +def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd32 addrmode6oneL32:$addr, + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; -def : Pat<(v2i64 (sextloadvi8 addrmode6oneL32:$addr)), +def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 - (VLD1LNd32 addrmode6oneL32:$addr, + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; //===----------------------------------------------------------------------===// @@ -6501,12 +6698,17 @@ def VST4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -// VMOV takes an optional datatype suffix +// VMOV/VMVN takes an optional datatype suffix defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; + // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",