X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMInstrNEON.td;h=43bd4c21dc39556555e6a4b41507738b847a4096;hb=ba29378fdc8aa184c0d7fa08022790b7ec7d8acf;hp=7fdf618cbdc74304c67ed52eb3fb0580ff314924;hpb=78d13e191e0f6bcb4bef5bc5c8c5f6e5be1f4070;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 7fdf618cbdc..43bd4c21dc3 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1,4 +1,4 @@ -//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// +//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -89,13 +89,13 @@ def VecListOneD : RegisterOperand { let ParserMatchClass = VecListOneDAsmOperand; } // Register list of two sequential D registers. -def VecListTwoDAsmOperand : AsmOperandClass { - let Name = "VecListTwoD"; +def VecListDPairAsmOperand : AsmOperandClass { + let Name = "VecListDPair"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoD : RegisterOperand { - let ParserMatchClass = VecListTwoDAsmOperand; +def VecListDPair : RegisterOperand { + let ParserMatchClass = VecListDPairAsmOperand; } // Register list of three sequential D registers. def VecListThreeDAsmOperand : AsmOperandClass { @@ -116,13 +116,13 @@ def VecListFourD : RegisterOperand { let ParserMatchClass = VecListFourDAsmOperand; } // Register list of two D registers spaced by 2 (two sequential Q registers). -def VecListTwoQAsmOperand : AsmOperandClass { - let Name = "VecListTwoQ"; +def VecListDPairSpacedAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpaced"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoQ : RegisterOperand { - let ParserMatchClass = VecListTwoQAsmOperand; +def VecListDPairSpaced : RegisterOperand { + let ParserMatchClass = VecListDPairSpacedAsmOperand; } // Register list of three D registers spaced by 2 (three Q registers). def VecListThreeQAsmOperand : AsmOperandClass { @@ -153,24 +153,65 @@ def VecListOneDAllLanes : RegisterOperand { let ParserMatchClass = VecListOneDAllLanesAsmOperand; } // Register list of two D registers, with "all lanes" subscripting. -def VecListTwoDAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListTwoDAllLanes"; +def VecListDPairAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoDAllLanes : RegisterOperand { - let ParserMatchClass = VecListTwoDAllLanesAsmOperand; +def VecListDPairAllLanes : RegisterOperand { + let ParserMatchClass = VecListDPairAllLanesAsmOperand; } // Register list of two D registers spaced by 2 (two sequential Q registers). -def VecListTwoQAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListTwoQAllLanes"; +def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpacedAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoQAllLanes : RegisterOperand { - let ParserMatchClass = VecListTwoQAllLanesAsmOperand; + let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; } +// Register list of three D registers, with "all lanes" subscripting. +def VecListThreeDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeDAllLanes : RegisterOperand { + let ParserMatchClass = VecListThreeDAllLanesAsmOperand; +} +// Register list of three D registers spaced by 2 (three sequential Q regs). +def VecListThreeQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQAllLanes : RegisterOperand { + let ParserMatchClass = VecListThreeQAllLanesAsmOperand; +} +// Register list of four D registers, with "all lanes" subscripting. +def VecListFourDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourDAllLanes : RegisterOperand { + let ParserMatchClass = VecListFourDAllLanesAsmOperand; +} +// Register list of four D registers spaced by 2 (four sequential Q regs). +def VecListFourQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQAllLanes : RegisterOperand { + let ParserMatchClass = VecListFourQAllLanesAsmOperand; +} + // Register list of one D register, with byte lane subscripting. def VecListOneDByteIndexAsmOperand : AsmOperandClass { @@ -306,6 +347,92 @@ def VecListThreeQWordIndexed : Operand { let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +// Register list of four D registers with byte lane subscripting. +def VecListFourDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDByteIndexed : Operand { + let ParserMatchClass = VecListFourDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListFourDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDHWordIndexed : Operand { + let ParserMatchClass = VecListFourDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDWordIndexed : Operand { + let ParserMatchClass = VecListFourDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of four Q registers with half-word lane subscripting. +def VecListFourQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQHWordIndexed : Operand { + let ParserMatchClass = VecListFourQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQWordIndexed : Operand { + let ParserMatchClass = VecListFourQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 8; +}]>; +def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 8; +}]>; +def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 4; +}]>; +def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 4; +}]>; +def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 2; +}]>; +def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 2; +}]>; +def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 1; +}]>; +def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 1; +}]>; +def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -438,16 +565,16 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ // Use VLDM to load a Q register as a D register pair. // This is a pseudo instruction that is expanded to VLDMD after reg alloc. def VLDMQIA - : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), + : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", - [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; + [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. def VSTMQIA - : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), + : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), IIC_fpStore_m, "", - [(store (v2f64 QPR:$src), GPR:$Rn)]>; + [(store (v2f64 DPair:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -499,15 +626,15 @@ class VLD1D op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VLD1Q op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins addrmode6:$Rn), IIC_VLD1x2, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; @@ -520,11 +647,6 @@ def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; -def VLD1q8Pseudo : VLDQPseudo; -def VLD1q16Pseudo : VLDQPseudo; -def VLD1q32Pseudo : VLDQPseudo; -def VLD1q64Pseudo : VLDQPseudo; - // ...with address register writeback: multiclass VLD1DWB op7_4, string Dt> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), @@ -533,35 +655,31 @@ multiclass VLD1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } multiclass VLD1QWB op7_4, string Dt> { - def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } - def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -574,15 +692,6 @@ defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; -def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q8PseudoWB_register : VLDQWBregisterPseudo; -def VLD1q16PseudoWB_register : VLDQWBregisterPseudo; -def VLD1q32PseudoWB_register : VLDQWBregisterPseudo; -def VLD1q64PseudoWB_register : VLDQWBregisterPseudo; - // ...with 3 registers class VLD1D3 op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), @@ -590,7 +699,7 @@ class VLD1D3 op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -599,16 +708,14 @@ multiclass VLD1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -631,7 +738,7 @@ class VLD1D4 op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -640,16 +747,14 @@ multiclass VLD1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -673,21 +778,17 @@ class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } -def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; -def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; -def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; -def VLD2d8Pseudo : VLDQPseudo; -def VLD2d16Pseudo : VLDQPseudo; -def VLD2d32Pseudo : VLDQPseudo; - def VLD2q8Pseudo : VLDQQPseudo; def VLD2q16Pseudo : VLDQQPseudo; def VLD2q32Pseudo : VLDQQPseudo; @@ -701,34 +802,25 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), itin, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } -defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; -defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; -defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; -def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD2d8PseudoWB_register : VLDQWBregisterPseudo; -def VLD2d16PseudoWB_register : VLDQWBregisterPseudo; -def VLD2d32PseudoWB_register : VLDQWBregisterPseudo; - def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo; @@ -737,12 +829,12 @@ def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo; def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo; // ...with double-spaced registers -def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; -def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; -def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; -defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; -defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; -defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D op11_8, bits<4> op7_4, string Dt> @@ -751,7 +843,7 @@ class VLD3D op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; @@ -770,7 +862,7 @@ class VLD3DWB op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; @@ -810,7 +902,7 @@ class VLD4D op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; @@ -829,7 +921,7 @@ class VLD4DWB op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; @@ -1210,39 +1302,32 @@ class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp> let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -class VLD1QDUPPseudo : VLDQPseudo { - let Pattern = [(set QPR:$dst, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; -} - def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; -def VLD1DUPq8Pseudo : VLD1QDUPPseudo; -def VLD1DUPq16Pseudo : VLD1QDUPPseudo; -def VLD1DUPq32Pseudo : VLD1QDUPPseudo; - def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPd32 addrmode6:$addr)>; -def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), - (VLD1DUPq32Pseudo addrmode6:$addr)>; - -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -class VLD1QDUP op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), +class VLD1QDUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", + [(set VecListDPairAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; -def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; -def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; + +def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPq32 addrmode6:$addr)>; +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: multiclass VLD1DUPWB op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, @@ -1253,7 +1338,6 @@ multiclass VLD1DUPWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), @@ -1262,28 +1346,25 @@ multiclass VLD1DUPWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QDUPWB op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1295,13 +1376,6 @@ defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; -def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo; -def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo; -def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo; - // VLD2DUP : Vector Load (single 2-element structure to all lanes) class VLD2DUP op7_4, string Dt, RegisterOperand VdTy> : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), @@ -1312,18 +1386,14 @@ class VLD2DUP op7_4, string Dt, RegisterOperand VdTy> let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>; - -def VLD2DUPd8Pseudo : VLDQPseudo; -def VLD2DUPd16Pseudo : VLDQPseudo; -def VLD2DUPd32Pseudo : VLDQPseudo; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; -// ...with double-spaced registers (not used for codegen): -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>; +// ...with double-spaced registers +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // ...with address register writeback: multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { @@ -1335,7 +1405,6 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), @@ -1344,24 +1413,16 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } -defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>; -defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>; -defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>; - -defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>; -defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>; -defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; -def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo ; -def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo; -def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo ; -def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo; -def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo ; -def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP op7_4, string Dt> @@ -1382,9 +1443,9 @@ def VLD3DUPd16Pseudo : VLDQQPseudo; def VLD3DUPd32Pseudo : VLDQQPseudo; // ...with double-spaced registers (not used for codegen): -def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; -def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; -def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; +def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; +def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; +def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD3DUPWB op7_4, string Dt> @@ -1400,9 +1461,9 @@ def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; -def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo; @@ -1428,9 +1489,9 @@ def VLD4DUPd16Pseudo : VLDQQPseudo; def VLD4DUPd32Pseudo : VLDQQPseudo; // ...with double-spaced registers (not used for codegen): -def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; -def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; -def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; +def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; +def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } // ...with address register writeback: class VLD4DUPWB op7_4, string Dt> @@ -1447,9 +1508,9 @@ def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; -def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; -def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; +def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; +def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo; @@ -1503,14 +1564,14 @@ class VST1D op7_4, string Dt> IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; @@ -1523,11 +1584,6 @@ def VST1q16 : VST1Q<{0,1,?,?}, "16">; def VST1q32 : VST1Q<{1,0,?,?}, "32">; def VST1q64 : VST1Q<{1,1,?,?}, "64">; -def VST1q8Pseudo : VSTQPseudo; -def VST1q16Pseudo : VSTQPseudo; -def VST1q32Pseudo : VSTQPseudo; -def VST1q64Pseudo : VSTQPseudo; - // ...with address register writeback: multiclass VST1DWB op7_4, string Dt> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), @@ -1536,8 +1592,7 @@ multiclass VST1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), @@ -1545,28 +1600,25 @@ multiclass VST1DWB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } multiclass VST1QWB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, + (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), + (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1580,15 +1632,6 @@ defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; -def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q8PseudoWB_register : VSTQWBregisterPseudo; -def VST1q16PseudoWB_register : VSTQWBregisterPseudo; -def VST1q32PseudoWB_register : VSTQWBregisterPseudo; -def VST1q64PseudoWB_register : VSTQWBregisterPseudo; - // ...with 3 registers class VST1D3 op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), @@ -1596,7 +1639,7 @@ class VST1D3 op7_4, string Dt> IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1605,8 +1648,7 @@ multiclass VST1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), @@ -1614,8 +1656,7 @@ multiclass VST1D3WB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1641,7 +1682,7 @@ class VST1D4 op7_4, string Dt> []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1650,8 +1691,7 @@ multiclass VST1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1659,8 +1699,7 @@ multiclass VST1D4WB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1685,21 +1724,17 @@ class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } -def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; -def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; -def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; -def VST2d8Pseudo : VSTQPseudo; -def VST2d16Pseudo : VSTQPseudo; -def VST2d32Pseudo : VSTQPseudo; - def VST2q8Pseudo : VSTQQPseudo; def VST2q16Pseudo : VSTQQPseudo; def VST2q32Pseudo : VSTQQPseudo; @@ -1713,16 +1748,14 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } multiclass VST2QWB op7_4, string Dt> { @@ -1732,8 +1765,7 @@ multiclass VST2QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1741,26 +1773,18 @@ multiclass VST2QWB op7_4, string Dt> { "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } -defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; -defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; -defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8PseudoWB_fixed : VSTQWBfixedPseudo; -def VST2d16PseudoWB_fixed : VSTQWBfixedPseudo; -def VST2d32PseudoWB_fixed : VSTQWBfixedPseudo; -def VST2d8PseudoWB_register : VSTQWBregisterPseudo; -def VST2d16PseudoWB_register : VSTQWBregisterPseudo; -def VST2d32PseudoWB_register : VSTQWBregisterPseudo; - def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo; def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo; def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo; @@ -1769,12 +1793,12 @@ def VST2q16PseudoWB_register : VSTQQWBregisterPseudo; def VST2q32PseudoWB_register : VSTQQWBregisterPseudo; // ...with double-spaced registers -def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; -def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; -def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; -defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; // VST3 : Vector Store (multiple 3-element structures) class VST3D op11_8, bits<4> op7_4, string Dt> @@ -1783,7 +1807,7 @@ class VST3D op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; @@ -1802,7 +1826,7 @@ class VST3DWB op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; @@ -1842,7 +1866,7 @@ class VST4D op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; @@ -1861,7 +1885,7 @@ class VST4DWB op11_8, bits<4> op7_4, string Dt> "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; @@ -1921,20 +1945,11 @@ class VSTQQQQLNWBPseudo // VST1LN : Vector Store (single element from one lane) class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), + (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVST1LN"; -} -class VST1LN32 op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), - IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } @@ -1945,16 +1960,17 @@ class VST1QLNPseudo } def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } -def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, + addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -1970,14 +1986,14 @@ def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), // ...with address register writeback: class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, + (ins AdrMode:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - addrmode6:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo @@ -1987,16 +2003,16 @@ class VST1QLNWBPseudo } def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, - extractelt> { + extractelt, addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -2229,6 +2245,38 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +// Use vld1/vst1 for unaligned f64 load / store +def : Pat<(f64 (hword_alignedload addrmode6:$addr)), + (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (byte_alignedload addrmode6:$addr)), + (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), + (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; +def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; + +// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 +// load / store if it's legal. +def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), + (VLD1q64 addrmode6:$addr)>; +def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q64 addrmode6:$addr, QPR:$value)>; +def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), + (VLD1q32 addrmode6:$addr)>; +def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q32 addrmode6:$addr, QPR:$value)>; +def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), + (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; +def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), + (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; //===----------------------------------------------------------------------===// // NEON pattern fragments @@ -2291,18 +2339,52 @@ class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; +// Same as above, but not predicated. +class N2VDIntnp op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; + +class N2VQIntnp op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; + +// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). +class N2VQIntXnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp; + +// Same as N2VQIntXnp but with Vd as a src register. +class N2VQIntX2np op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp { + let Constraints = "$src = $Vd"; +} + // Narrow 2-register operations. class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -2316,7 +2398,7 @@ class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyD, ValueType TyQ, Intrinsic IntOp> + ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> : N2V; @@ -2334,7 +2416,7 @@ class N2VL op24_23, bits<2> op21_20, bits<2> op19_18, class N2VLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp> + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> : N2V; @@ -2359,6 +2441,8 @@ class N3VD op21_20, bits<4> op11_8, bit op4, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } // Same as N3VD but no data type. @@ -2370,6 +2454,8 @@ class N3VDX op21_20, bits<4> op11_8, bit op4, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, "$Vd, $Vn, $Vm", "", [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } @@ -2382,6 +2468,8 @@ class N3VDSL op21_20, bits<4> op11_8, [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } class N3VDSL16 op21_20, bits<4> op11_8, @@ -2392,6 +2480,8 @@ class N3VDSL16 op21_20, bits<4> op11_8, [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } @@ -2402,6 +2492,8 @@ class N3VQ op21_20, bits<4> op11_8, bit op4, (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VQX op21_20, bits<4> op11_8, bit op4, @@ -2411,6 +2503,8 @@ class N3VQX op21_20, bits<4> op11_8, bit op4, (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, OpcodeStr, "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VQSL op21_20, bits<4> op11_8, @@ -2423,6 +2517,8 @@ class N3VQSL op21_20, bits<4> op11_8, (ResTy (ShOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, @@ -2434,21 +2530,35 @@ class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, (ResTy (ShOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_8:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VDIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2458,8 +2568,9 @@ class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]> { let isCommutable = 0; } + class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2470,26 +2581,52 @@ class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VDIntSh op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V { + let TwoOperandAliasConstraint = "$Vm = $Vd"; let isCommutable = 0; } class N3VQInt op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VQIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + +// Same as N3VQIntnp but with Vd as a src register. +class N3VQInt3np op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp { + let Constraints = "$src = $Vd"; +} + class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2501,7 +2638,7 @@ class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2513,11 +2650,12 @@ class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VQIntSh op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V { + let TwoOperandAliasConstraint = "$Vm = $Vd"; let isCommutable = 0; } @@ -2597,7 +2735,7 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, // Neon Intrinsic-Op instructions (VABA): double- and quad-register. class N3VDIntOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp, SDNode OpNode> + ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> : N3V op21_20, bits<4> op11_8, bit op4, (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; class N3VQIntOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp, SDNode OpNode> + ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> : N3V op21_20, bits<4> op11_8, bit op4, // The destination register is also used as the first source operand register. class N3VDInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V op21_20, bits<4> op11_8, bit op4, (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; class N3VQInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V op21_20, bits<4> op11_8, // Long Intrinsic-Op vector operations with explicit extend (VABAL). class N3VLIntExtOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> : N3V op21_20, bits<4> op11_8, bit op4, // a quad-register and is also used as the first source operand register. class N3VLInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp> + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> : N3V op21_20, bits<4> op11_8, bit op4, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane32 op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]>; class N3VLInt3SL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane16 op21_20, bits<4> op11_8, // Narrowing 3-register intrinsics. class N3VNInt op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, - Intrinsic IntOp, bit Commutable> + SDPatternOperator IntOp, bit Commutable> : N3V op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + class N3VLSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> @@ -2771,7 +2910,7 @@ class N3VLExt op21_20, bits<4> op11_8, bit op4, // Long 3-register intrinsics with explicit extend (VABDL). class N3VLIntExt op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, bit Commutable> : N3V op21_20, bits<4> op11_8, bit op4, // Long 3-register intrinsics. class N3VLInt op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } + +// Same as above, but not predicated. +class N3VLIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane32 op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]>; class N3VLIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane16 op21_20, bits<4> op11_8, bit op4, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), (TyQ (ExtOp (TyD DPR:$Vm)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } @@ -2828,14 +2980,14 @@ class N3VW op21_20, bits<4> op11_8, bit op4, class N2VDPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; @@ -2846,7 +2998,7 @@ class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, class N2VDPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V op24_23, bits<2> op21_20, bits<2> op19_18, class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { class N2VDSh op11_8, bit op7, bit op4, Format f, InstrItinClass itin, Operand ImmTy, string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> @@ -2876,6 +3029,7 @@ class N2VQSh op11_8, bit op7, bit op4, (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; +} // Long shift by immediate. class N2VLSh op11_8, bit op7, bit op6, bit op4, @@ -2899,6 +3053,7 @@ class N2VNSh op11_8, bit op7, bit op6, bit op4, // Shift right by immediate and accumulate, // both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { class N2VDShAdd op11_8, bit op7, bit op4, Operand ImmTy, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> @@ -2915,9 +3070,11 @@ class N2VQShAdd op11_8, bit op7, bit op4, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (add QPR:$src1, (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; +} // Shift by immediate and insert, // both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { class N2VDShIns op11_8, bit op7, bit op4, Operand ImmTy, Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> @@ -2932,19 +3089,20 @@ class N2VQShIns op11_8, bit op7, bit op4, (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; +} // Convert, with fractional bits immediate, // both double- and quad-register. class N2VCvtD op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + SDPatternOperator IntOp> : N2VImm; class N2VCvtQ op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + SDPatternOperator IntOp> : N2VImm op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N2VDInt; @@ -3055,7 +3213,7 @@ multiclass N2VN_HSD op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp> { + SDPatternOperator IntOp> { def v8i8 : N2VNInt; @@ -3143,7 +3301,7 @@ multiclass N3VInt_HS op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { + SDPatternOperator IntOp, bit Commutable = 0> { // 64-bit vector types. def v4i16 : N3VDInt op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp> { + SDPatternOperator IntOp> { // 64-bit vector types. def v4i16 : N3VDIntSh op11_8, bit op4, Format f, multiclass N3VIntSL_HS op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, @@ -3201,7 +3359,7 @@ multiclass N3VInt_QHS op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> + SDPatternOperator IntOp, bit Commutable = 0> : N3VInt_HS { def v8i8 : N3VDInt op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp> + SDPatternOperator IntOp> : N3VInt_HSSh { def v8i8 : N3VDIntSh op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> + SDPatternOperator IntOp, bit Commutable = 0> : N3VInt_QHS { def v1i64 : N3VDInt op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp> + SDPatternOperator IntOp> : N3VInt_QHSSh { def v1i64 : N3VDIntSh op11_8, bit op4, Format f, // source operand element sizes of 16, 32 and 64 bits: multiclass N3VNInt_HSD op11_8, bit op4, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { + SDPatternOperator IntOp, bit Commutable = 0> { def v8i8 : N3VNInt; @@ -3321,7 +3479,7 @@ multiclass N3VLExt_QHS op11_8, bit op4, multiclass N3VLInt_HS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { + SDPatternOperator IntOp, bit Commutable = 0> { def v4i32 : N3VLInt; @@ -3332,7 +3490,7 @@ multiclass N3VLInt_HS op11_8, bit op4, multiclass N3VLIntSL_HS op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp> { + SDPatternOperator IntOp> { def v4i16 : N3VLIntSL16; def v2i32 : N3VLIntSL op11_8, multiclass N3VLInt_QHS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> + SDPatternOperator IntOp, bit Commutable = 0> : N3VLInt_HS { def v8i16 : N3VLInt op11_8, bit op4, // ....with explicit extend (VABDL). multiclass N3VLIntExt_QHS op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { + SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { def v8i16 : N3VLIntExt; @@ -3427,7 +3585,7 @@ multiclass N3VMulOpSL_HS op11_8, // element sizes of 8, 16 and 32 bits: multiclass N3VIntOp_QHS op11_8, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp, + string OpcodeStr, string Dt, SDPatternOperator IntOp, SDNode OpNode> { // 64-bit vector types. def v8i8 : N3VDIntOp op11_8, bit op4, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS op11_8, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N3VDInt3; @@ -3497,7 +3655,7 @@ multiclass N3VLMulOpSL_HS op11_8, string OpcodeStr, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { def v4i32 : N3VLInt3; def v2i64 : N3VLInt3 op11_8, bit op4, } multiclass N3VLInt3SL_HS op11_8, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { def v4i16 : N3VLInt3SL16; def v2i32 : N3VLInt3SL op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, Intrinsic IntOp> + string OpcodeStr, string Dt, SDPatternOperator IntOp> : N3VLInt3_HS { def v8i16 : N3VLInt3; @@ -3524,7 +3682,7 @@ multiclass N3VLInt3_QHS op11_8, bit op4, // ....with explicit extend (VABAL). multiclass N3VLIntExtOp_QHS op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { + SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { def v8i16 : N3VLIntExtOp; @@ -3541,7 +3699,7 @@ multiclass N3VLIntExtOp_QHS op11_8, bit op4, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt; @@ -3564,7 +3722,7 @@ multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt2; @@ -3625,7 +3783,7 @@ multiclass N2VShL_QHSD op11_8, bit op4, } multiclass N2VShR_QHSD op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { + string baseOpc, SDNode OpNode> { // 64-bit vector types. def v8i8 : N2VDSh { @@ -3859,12 +4017,18 @@ defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", - int_arm_neon_vaddhn, 1>; +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", int_arm_neon_vraddhn, 1>; +def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) @@ -3902,6 +4066,17 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfd DPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; +def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfq QPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; + + // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, @@ -3947,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", NEONvmulls, 1>; -defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", NEONvmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", - v8i16, v8i8, int_arm_neon_vmullp, 1>; +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "NEONData" in { + defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; + defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; + def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", + v8i16, v8i8, int_arm_neon_vmullp, 1>; + def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, + "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, + Requires<[HasV8, HasCrypto]>; +} defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; @@ -3969,10 +4150,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -4019,18 +4200,37 @@ defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlal", "s", int_arm_neon_vqdmlal>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; + "vqdmlal", "s", null_frag>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; // VMLS : Vector Multiply Subtract (integer and floating-point) defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -4076,26 +4276,58 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlsl", "s", int_arm_neon_vqdmlsl>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; - + "vqdmlsl", "s", null_frag>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEONVFP4]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEONVFP4]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEONVFP4]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEONVFP4]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), + (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), + (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), + (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), + (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; // Vector Subtract Operations. @@ -4129,12 +4361,18 @@ defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", - int_arm_neon_vsubhn, 0>; +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", int_arm_neon_vrsubhn, 0>; +def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Comparisons. // VCEQ : Vector Compare Equal @@ -4145,6 +4383,7 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$Vd, $Vm, #0", NEONvceqz>; @@ -4158,10 +4397,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", "$Vd, $Vm, #0", NEONvcgez>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$Vd, $Vm, #0", NEONvclez>; +} // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -4173,10 +4414,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", "$Vd, $Vm, #0", NEONvcgtz>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", "$Vd, $Vm, #0", NEONvcltz>; +} // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", @@ -4192,6 +4435,24 @@ def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; + +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; + // Vector Bitwise Operations. def vnotd : PatFrag<(ops node:$in), @@ -4256,6 +4517,7 @@ def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, // VBIC : Vector Bitwise Bit Clear (AND NOT) +let TwoOperandAliasConstraint = "$Vn = $Vd" in { def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, "vbic", "$Vd, $Vn, $Vm", "", @@ -4266,6 +4528,7 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), "vbic", "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (v4i32 (and QPR:$Vn, (vnotq QPR:$Vm))))]>; +} def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), @@ -4367,10 +4630,36 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), + (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), + (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), + (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; + +def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), @@ -4379,9 +4668,35 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; +def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), + (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), + (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), + (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; + def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -4463,6 +4778,18 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; +// VMAXNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v2f32, v2f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v4f32, v4f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -4477,6 +4804,18 @@ def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; +// VMINNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v2f32, v2f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v4f32, v4f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // Vector Pairwise Operations. // VPADD : Vector Pairwise Add @@ -4597,8 +4936,10 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; -defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", + NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", + NEONvshru>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -4632,8 +4973,10 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; -defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", + NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", + NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -4700,12 +5043,38 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", int_arm_neon_vabs>; -def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs", "f32", - v2f32, v2f32, int_arm_neon_vabs>; -def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAQ, "vabs", "f32", - v4f32, v4f32, int_arm_neon_vabs>; +def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v2f32, v2f32, fabs>; +def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v4f32, v4f32, fabs>; + +def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), + (v2i32 (bitconvert (v8i8 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 7))))))), + (VABSv8i8 DPR:$src)>; +def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), + (v2i32 (bitconvert (v4i16 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 15))))))), + (VABSv4i16 DPR:$src)>; +def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), + (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), + (VABSv2i32 DPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), + (v4i32 (bitconvert (v16i8 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 7))))))), + (VABSv16i8 QPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), + (v4i32 (bitconvert (v8i16 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 15))))))), + (VABSv8i16 QPR:$src)>; +def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), + (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), + (VABSv4i32 QPR:$src)>; + +def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; +def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, @@ -4767,30 +5136,32 @@ defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, // VCLZ : Vector Count Leading Zeros defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", - int_arm_neon_vclz>; + ctlz>; // VCNT : Vector Count One Bits def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiD, "vcnt", "8", - v8i8, v8i8, int_arm_neon_vcnt>; + v8i8, v8i8, ctpop>; def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", - v16i8, v16i8, int_arm_neon_vcnt>; + v16i8, v16i8, ctpop>; // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", + []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", + []>; // Vector Move Operations. // VMOV : Vector Move (Register) -def : InstAlias<"vmov${p} $Vd, $Vm", - (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -def : InstAlias<"vmov${p} $Vd, $Vm", - (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VMOV : Vector Move (Immediate) @@ -4889,7 +5260,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), - imm:$lane))]> { + imm:$lane))]>, + Requires<[HasNEON, HasFastVGETLNi32]> { let Inst{21} = lane{0}; } // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -4912,7 +5284,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane))>; + (SubReg_i32_lane imm:$lane))>, + Requires<[HasNEON, HasFastVGETLNi32]>; +def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; @@ -5023,14 +5404,23 @@ class VDUPQ opcod1, bits<2> opcod3, string Dt, ValueType Ty> def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, + Requires<[HasNEON, HasFastVDUP32]>; def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +// NEONvdup patterns for uarchs with fast VDUP.32. +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, + Requires<[HasNEON,HasFastVDUP32]>; def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; +// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. +def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; + // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND op19_16, string OpcodeStr, string Dt, @@ -5139,6 +5529,26 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; +// VCVT{A, N, P, M} +multiclass VCVT_FPI op10_8, SDPatternOperator IntS, + SDPatternOperator IntU> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; + def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; + def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; + def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; + } +} + +defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; +defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; +defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; +defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; + // VCVT : Vector Convert Between Floating-Point and Fixed-Point. let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", @@ -5162,6 +5572,25 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; } +def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", + (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", + (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", + (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", + (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", + (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", + (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", + (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", + (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; + + // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, IIC_VUNAQ, "vcvt", "f16.f32", @@ -5253,14 +5682,18 @@ def : AlignedVEXTq; // VEXT : Vector Extract + +// All of these have a two-operand InstAlias. +let TwoOperandAliasConstraint = "$Vn = $Vd" in { class VEXTd : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), (Ty DPR:$Vm), imm:$index)))]> { - bits<4> index; - let Inst{11-8} = index{3-0}; + bits<3> index; + let Inst{11} = 0b0; + let Inst{10-8} = index{2-0}; } class VEXTq @@ -5272,16 +5705,17 @@ class VEXTq bits<4> index; let Inst{11-8} = index{3-0}; } +} def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { - let Inst{11-8} = index{3-0}; + let Inst{10-8} = index{2-0}; } def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { - let Inst{11-9} = index{2-0}; + let Inst{10-9} = index{1-0}; let Inst{8} = 0b0; } def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { - let Inst{11-10} = index{1-0}; + let Inst{10} = index{0}; let Inst{9-8} = 0b00; } def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), @@ -5323,7 +5757,9 @@ def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; +// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; @@ -5333,7 +5769,9 @@ def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; +// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; @@ -5351,7 +5789,7 @@ def VTBL1 let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), @@ -5364,8 +5802,6 @@ def VTBL4 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBL2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; def VTBL3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; def VTBL4Pseudo @@ -5381,7 +5817,7 @@ def VTBX1 let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), @@ -5396,9 +5832,6 @@ def VTBX4 "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBX2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), - IIC_VTBX2, "$orig = $dst", []>; def VTBX3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), IIC_VTBX3, "$orig = $dst", []>; @@ -5407,6 +5840,77 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +// VRINT : Vector Rounding +multiclass VRINT_FPI op9_7, SDPatternOperator Int> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + } + + def : NEONInstAlias(NAME#"D") DPR:$Dd, DPR:$Dm)>; + def : NEONInstAlias(NAME#"Q") QPR:$Qd, QPR:$Qm)>; +} + +defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; +defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; +defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; +defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; +defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; +defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; + +// Cryptography instructions +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "v8Crypto" in { + class AES + : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class AES2Op + : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA2Op op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N3SHA3Op op27_23, bits<2> op21_20, SDPatternOperator Int> + : N3VQInt3np, + Requires<[HasV8, HasCrypto]>; +} + +def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; +def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; +def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; +def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; + +def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; +def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; +def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; +def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; +def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; +def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; +def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; +def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; +def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; +def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// @@ -5448,13 +5952,13 @@ def : N3VSPat; def : N3VSPat; def : N3VSPat; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat, - Requires<[HasNEONVFP4, UseNEONForFP]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N3VSMulOpPat, - Requires<[HasNEONVFP4, UseNEONForFP]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N2VSPat; def : N2VSPat; def : N3VSPat; @@ -5464,6 +5968,11 @@ def : N2VSPat; def : N2VSPat; def : N2VSPat; +// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. +def : Pat<(f32 (bitconvert GPR:$a)), + (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, + Requires<[HasNEON, DontUseVMOVSR]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -5531,6 +6040,160 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +// Fold extracting an element out of a v2i32 into a vfp register. +def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), + (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; + +// Vector lengthening move with load, matching extending loads. + +// extload, zextload and sextload for a standard lengthening load. Example: +// Lengthen_Single<"8", "i16", "8"> = +// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) +// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0)))>; +multiclass Lengthen_Single { + let AddedComplexity = 10 in { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadvi" # SrcTy) addrmode6:$addr)), + (!cast("VMOVLuv" # DestLanes # DestTy) + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadvi" # SrcTy) addrmode6:$addr)), + (!cast("VMOVLuv" # DestLanes # DestTy) + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadvi" # SrcTy) addrmode6:$addr)), + (!cast("VMOVLsv" # DestLanes # DestTy) + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + } +} + +// extload, zextload and sextload for a lengthening load which only uses +// half the lanes available. Example: +// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = +// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), +// dsub_0)>; +multiclass Lengthen_HalfSingle { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLsv" # InsnLanes # InsnTy) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length. +// +// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), +// (i32 0))), +// dsub_0)), +// dsub_0)>; +multiclass Lengthen_Double { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length, but which ends up only +// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). +// +// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), +// dsub_0)), +// dsub_0)>; +multiclass Lengthen_HalfDouble { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), + dsub_0)>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), + dsub_0)>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), + dsub_0)>; +} + +defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 +defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 +defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 + +defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 +defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 + +// Double lengthening - v4i8 -> v4i16 -> v4i32 +defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; +// v2i8 -> v2i16 -> v2i32 +defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; +// v2i16 -> v2i32 -> v2i64 +defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; + +// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 +def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; //===----------------------------------------------------------------------===// // Assembler aliases @@ -5541,69 +6204,6 @@ def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; - -// VADD two-operand aliases. -def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", - (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", - (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", - (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", - (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", - (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", - (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", - (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", - (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", - (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", - (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VSUB two-operand aliases. -def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", - (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", - (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", - (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", - (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", - (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", - (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", - (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", - (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", - (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", - (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VADDW two-operand aliases. -def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", - (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", - (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", - (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", - (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", - (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", - (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; - // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; @@ -5622,23 +6222,6 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; // ... two-operand aliases -def : NEONInstAlias<"vand${p} $Vdn, $Vm", - (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vand${p} $Vdn, $Vm", - (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vbic${p} $Vdn, $Vm", - (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vbic${p} $Vdn, $Vm", - (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"veor${p} $Vdn, $Vm", - (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"veor${p} $Vdn, $Vm", - (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vorr${p} $Vdn, $Vm", - (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vorr${p} $Vdn, $Vm", - (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", @@ -5652,177 +6235,6 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -// VMUL two-operand aliases. -def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", - (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", - (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", - (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", - (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", - (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", - (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", - (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", - (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", - (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", - (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", - (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, - VectorIndex16:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", - (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, - VectorIndex16:$lane, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", - (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", - (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", - (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", - (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; - -// VQADD (register) two-operand aliases. -def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", - (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", - (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", - (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", - (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", - (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", - (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", - (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", - (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", - (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", - (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", - (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", - (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", - (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", - (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", - (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", - (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VSHL (immediate) two-operand aliases. -def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", - (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", - (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", - (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", - (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; - -def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", - (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", - (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", - (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", - (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; - -// VSHL (register) two-operand aliases. -def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", - (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", - (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", - (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", - (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", - (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", - (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", - (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", - (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", - (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", - (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", - (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", - (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", - (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", - (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", - (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", - (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VSHL (immediate) two-operand aliases. -def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", - (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", - (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", - (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", - (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", - (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", - (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", - (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", - (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", - (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", - (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", - (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", - (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", - (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", - (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", - (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", - (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; - // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", @@ -5985,6 +6397,64 @@ def VST2LNqWB_register_Asm_32 : (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VLD3 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD3DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VLD3 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. @@ -6203,6 +6673,114 @@ def VST3qWB_register_Asm_32 : (ins VecListThreeQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VLD4 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD4DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VLD4 multiple structure pseudo-instructions. These need special handling for @@ -6264,6 +6842,55 @@ def VLD4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VST4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VST4 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. @@ -6324,12 +6951,17 @@ def VST4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -// VMOV takes an optional datatype suffix +// VMOV/VMVN takes an optional datatype suffix defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; + // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", @@ -6394,112 +7026,26 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -// Two-operand variants for VEXT -def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", - (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", - (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", - (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; - -def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", - (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", - (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", - (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", - (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; - -// Two-operand variants for VQDMULH -def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", - (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", - (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", - (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", - (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// Two-operand variants for VMAX. -def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", - (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", - (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", - (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", - (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", - (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", - (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", - (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", - (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", - (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", - (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", - (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", - (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", - (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", - (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// Two-operand variants for VMIN. -def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", - (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", - (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", - (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", - (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", - (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", - (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", - (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", - (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", - (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", - (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", - (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", - (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", - (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", - (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// Two-operand variants for VPADD. -def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm", - (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm", - (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm", - (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm", - (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - // VSWP allows, but does not require, a type suffix. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; +// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; + // "vmov Rd, #-imm" can be handled via "vmvn". def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;