X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMInstrNEON.td;h=8158a11f8316956a2b47e540371af94fac7d22c7;hb=616471d4bfe4717fa86259ff4534703357b3b723;hp=0b58b898a978bcffb17f797f8c64274a85a40101;hpb=799ca9d1b7cfa8910ac27f8de4929bfbd278114d;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 0b58b898a97..8158a11f831 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1,4 +1,4 @@ -//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// +//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -39,6 +39,11 @@ def nImmVMOVI32 : Operand { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } +def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } +def nImmVMOVI32Neg : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32NegAsmOperand; +} def nImmVMOVF32 : Operand { let PrintMethod = "printFPImmOperand"; let ParserMatchClass = FPImmOperand; @@ -84,13 +89,13 @@ def VecListOneD : RegisterOperand { let ParserMatchClass = VecListOneDAsmOperand; } // Register list of two sequential D registers. -def VecListTwoDAsmOperand : AsmOperandClass { - let Name = "VecListTwoD"; +def VecListDPairAsmOperand : AsmOperandClass { + let Name = "VecListDPair"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoD : RegisterOperand { - let ParserMatchClass = VecListTwoDAsmOperand; +def VecListDPair : RegisterOperand { + let ParserMatchClass = VecListDPairAsmOperand; } // Register list of three sequential D registers. def VecListThreeDAsmOperand : AsmOperandClass { @@ -111,13 +116,31 @@ def VecListFourD : RegisterOperand { let ParserMatchClass = VecListFourDAsmOperand; } // Register list of two D registers spaced by 2 (two sequential Q registers). -def VecListTwoQAsmOperand : AsmOperandClass { - let Name = "VecListTwoQ"; +def VecListDPairSpacedAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpaced"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairSpaced : RegisterOperand { + let ParserMatchClass = VecListDPairSpacedAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListThreeQAsmOperand : AsmOperandClass { + let Name = "VecListThreeQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQ : RegisterOperand { + let ParserMatchClass = VecListThreeQAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListFourQAsmOperand : AsmOperandClass { + let Name = "VecListFourQ"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoQ : RegisterOperand { - let ParserMatchClass = VecListTwoQAsmOperand; +def VecListFourQ : RegisterOperand { + let ParserMatchClass = VecListFourQAsmOperand; } // Register list of one D register, with "all lanes" subscripting. @@ -130,14 +153,65 @@ def VecListOneDAllLanes : RegisterOperand { let ParserMatchClass = VecListOneDAllLanesAsmOperand; } // Register list of two D registers, with "all lanes" subscripting. -def VecListTwoDAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListTwoDAllLanes"; +def VecListDPairAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairAllLanes : RegisterOperand { + let ParserMatchClass = VecListDPairAllLanesAsmOperand; +} +// Register list of two D registers spaced by 2 (two sequential Q registers). +def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpacedAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairSpacedAllLanes : RegisterOperand { + let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; +} +// Register list of three D registers, with "all lanes" subscripting. +def VecListThreeDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeDAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoDAllLanes : RegisterOperand { - let ParserMatchClass = VecListTwoDAllLanesAsmOperand; +def VecListThreeDAllLanes : RegisterOperand { + let ParserMatchClass = VecListThreeDAllLanesAsmOperand; } +// Register list of three D registers spaced by 2 (three sequential Q regs). +def VecListThreeQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQAllLanes : RegisterOperand { + let ParserMatchClass = VecListThreeQAllLanesAsmOperand; +} +// Register list of four D registers, with "all lanes" subscripting. +def VecListFourDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourDAllLanes : RegisterOperand { + let ParserMatchClass = VecListFourDAllLanesAsmOperand; +} +// Register list of four D registers spaced by 2 (four sequential Q regs). +def VecListFourQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQAllLanes : RegisterOperand { + let ParserMatchClass = VecListFourQAllLanesAsmOperand; +} + // Register list of one D register, with byte lane subscripting. def VecListOneDByteIndexAsmOperand : AsmOperandClass { @@ -169,7 +243,8 @@ def VecListOneDWordIndexed : Operand { let ParserMatchClass = VecListOneDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } -// Register list of two D registers, with byte lane subscripting. + +// Register list of two D registers with byte lane subscripting. def VecListTwoDByteIndexAsmOperand : AsmOperandClass { let Name = "VecListTwoDByteIndexed"; let ParserMethod = "parseVectorList"; @@ -199,6 +274,151 @@ def VecListTwoDWordIndexed : Operand { let ParserMatchClass = VecListTwoDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +// Register list of two Q registers with half-word lane subscripting. +def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQHWordIndexed : Operand { + let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQWordIndexed : Operand { + let ParserMatchClass = VecListTwoQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + + +// Register list of three D registers with byte lane subscripting. +def VecListThreeDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDByteIndexed : Operand { + let ParserMatchClass = VecListThreeDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDHWordIndexed : Operand { + let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDWordIndexed : Operand { + let ParserMatchClass = VecListThreeDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of three Q registers with half-word lane subscripting. +def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQHWordIndexed : Operand { + let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQWordIndexed : Operand { + let ParserMatchClass = VecListThreeQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +// Register list of four D registers with byte lane subscripting. +def VecListFourDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDByteIndexed : Operand { + let ParserMatchClass = VecListFourDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListFourDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDHWordIndexed : Operand { + let ParserMatchClass = VecListFourDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDWordIndexed : Operand { + let ParserMatchClass = VecListFourDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of four Q registers with half-word lane subscripting. +def VecListFourQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQHWordIndexed : Operand { + let ParserMatchClass = VecListFourQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQWordIndexed : Operand { + let ParserMatchClass = VecListFourQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 2; +}]>; +def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 2; +}]>; +def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 1; +}]>; +def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 1; +}]>; +def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -331,16 +551,16 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ // Use VLDM to load a Q register as a D register pair. // This is a pseudo instruction that is expanded to VLDMD after reg alloc. def VLDMQIA - : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), + : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", - [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; + [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. def VSTMQIA - : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), + : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), IIC_fpStore_m, "", - [(store (v2f64 QPR:$src), GPR:$Rn)]>; + [(store (v2f64 DPair:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -395,7 +615,7 @@ class VLD1D op7_4, string Dt> let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins addrmode6:$Rn), IIC_VLD1x2, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; @@ -413,11 +633,6 @@ def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; -def VLD1q8Pseudo : VLDQPseudo; -def VLD1q16Pseudo : VLDQPseudo; -def VLD1q32Pseudo : VLDQPseudo; -def VLD1q64Pseudo : VLDQPseudo; - // ...with address register writeback: multiclass VLD1DWB op7_4, string Dt> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), @@ -439,7 +654,7 @@ multiclass VLD1DWB op7_4, string Dt> { } } multiclass VLD1QWB op7_4, string Dt> { - def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { @@ -448,7 +663,7 @@ multiclass VLD1QWB op7_4, string Dt> { let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } - def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -467,15 +682,6 @@ defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; -def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q8PseudoWB_register : VLDQWBregisterPseudo; -def VLD1q16PseudoWB_register : VLDQWBregisterPseudo; -def VLD1q32PseudoWB_register : VLDQWBregisterPseudo; -def VLD1q64PseudoWB_register : VLDQWBregisterPseudo; - // ...with 3 registers class VLD1D3 op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), @@ -569,18 +775,14 @@ class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; -def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; -def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; -def VLD2d8Pseudo : VLDQPseudo; -def VLD2d16Pseudo : VLDQPseudo; -def VLD2d32Pseudo : VLDQPseudo; - def VLD2q8Pseudo : VLDQQPseudo; def VLD2q16Pseudo : VLDQQPseudo; def VLD2q32Pseudo : VLDQQPseudo; @@ -607,21 +809,14 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, } } -defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; -defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; -defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; -def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD2d8PseudoWB_register : VLDQWBregisterPseudo; -def VLD2d16PseudoWB_register : VLDQWBregisterPseudo; -def VLD2d32PseudoWB_register : VLDQWBregisterPseudo; - def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo; @@ -630,12 +825,12 @@ def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo; def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo; // ...with double-spaced registers -def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; -def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; -def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; -defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; -defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; -defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D op11_8, bits<4> op7_4, string Dt> @@ -819,12 +1014,11 @@ def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { } def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{4}; + let Inst{5-4} = Rn{5-4}; } def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { let Inst{7} = lane{0}; - let Inst{5} = Rn{4}; - let Inst{4} = Rn{4}; + let Inst{5-4} = Rn{5-4}; } def VLD1LNq8Pseudo : VLD1QLNPseudo; @@ -994,7 +1188,7 @@ def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; } def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo; @@ -1005,7 +1199,7 @@ def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; } def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; @@ -1020,7 +1214,7 @@ class VLD4LN op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { let Rm = 0b1111; - let Inst{4} = Rn{4}; + let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN"; } @@ -1031,7 +1225,7 @@ def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1044,7 +1238,7 @@ def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1072,7 +1266,7 @@ def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1084,7 +1278,7 @@ def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1104,39 +1298,32 @@ class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp> let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -class VLD1QDUPPseudo : VLDQPseudo { - let Pattern = [(set QPR:$dst, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; -} - def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; -def VLD1DUPq8Pseudo : VLD1QDUPPseudo; -def VLD1DUPq16Pseudo : VLD1QDUPPseudo; -def VLD1DUPq32Pseudo : VLD1QDUPPseudo; - def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPd32 addrmode6:$addr)>; -def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), - (VLD1DUPq32Pseudo addrmode6:$addr)>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { - -class VLD1QDUP op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), +class VLD1QDUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", + [(set VecListDPairAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; -def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; -def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; +def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPq32 addrmode6:$addr)>; + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: multiclass VLD1DUPWB op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, @@ -1161,7 +1348,7 @@ multiclass VLD1DUPWB op7_4, string Dt> { } multiclass VLD1QDUPWB op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { @@ -1171,7 +1358,7 @@ multiclass VLD1QDUPWB op7_4, string Dt> { let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1189,56 +1376,55 @@ defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; -def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo; -def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo; -def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo; - // VLD2DUP : Vector Load (single 2-element structure to all lanes) -class VLD2DUP op7_4, string Dt> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), +class VLD2DUP op7_4, string Dt, RegisterOperand VdTy> + : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), (ins addrmode6dup:$Rn), IIC_VLD2dup, - "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; -def VLD2DUPd8Pseudo : VLDQPseudo; -def VLD2DUPd16Pseudo : VLDQPseudo; -def VLD2DUPd32Pseudo : VLDQPseudo; - -// ...with double-spaced registers (not used for codegen): -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">; +// ...with double-spaced registers +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // ...with address register writeback: -class VLD2DUPWB op7_4, string Dt> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, - "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2DupInstruction"; +multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { + def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; -def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; -def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; - -def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">; -def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">; -def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; -def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo; -def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo; -def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP op7_4, string Dt> @@ -1259,9 +1445,9 @@ def VLD3DUPd16Pseudo : VLDQQPseudo; def VLD3DUPd32Pseudo : VLDQQPseudo; // ...with double-spaced registers (not used for codegen): -def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; -def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; -def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; +def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; +def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; +def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD3DUPWB op7_4, string Dt> @@ -1277,9 +1463,9 @@ def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; -def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo; @@ -1305,9 +1491,9 @@ def VLD4DUPd16Pseudo : VLDQQPseudo; def VLD4DUPd32Pseudo : VLDQQPseudo; // ...with double-spaced registers (not used for codegen): -def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; -def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; -def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; +def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; +def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } // ...with address register writeback: class VLD4DUPWB op7_4, string Dt> @@ -1324,9 +1510,9 @@ def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; -def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; -def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; +def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; +def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo; @@ -1358,6 +1544,15 @@ class VSTQQWBPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, "$addr.addr = $wb">; +class VSTQQWBfixedPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, QQPR:$src), itin, + "$addr.addr = $wb">; +class VSTQQWBregisterPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, + "$addr.addr = $wb">; + class VSTQQQQPseudo : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; class VSTQQQQWBPseudo @@ -1374,7 +1569,7 @@ class VST1D op7_4, string Dt> let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; @@ -1391,11 +1586,6 @@ def VST1q16 : VST1Q<{0,1,?,?}, "16">; def VST1q32 : VST1Q<{1,0,?,?}, "32">; def VST1q64 : VST1Q<{1,1,?,?}, "64">; -def VST1q8Pseudo : VSTQPseudo; -def VST1q16Pseudo : VSTQPseudo; -def VST1q32Pseudo : VSTQPseudo; -def VST1q64Pseudo : VSTQPseudo; - // ...with address register writeback: multiclass VST1DWB op7_4, string Dt> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), @@ -1419,7 +1609,7 @@ multiclass VST1DWB op7_4, string Dt> { } multiclass VST1QWB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, + (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1428,7 +1618,7 @@ multiclass VST1QWB op7_4, string Dt> { let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), + (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1448,15 +1638,6 @@ defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; -def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q8PseudoWB_register : VSTQWBregisterPseudo; -def VST1q16PseudoWB_register : VSTQWBregisterPseudo; -def VST1q32PseudoWB_register : VSTQWBregisterPseudo; -def VST1q64PseudoWB_register : VSTQWBregisterPseudo; - // ...with 3 registers class VST1D3 op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), @@ -1556,18 +1737,14 @@ class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, let DecoderMethod = "DecodeVSTInstruction"; } -def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; -def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; -def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; -def VST2d8Pseudo : VSTQPseudo; -def VST2d16Pseudo : VSTQPseudo; -def VST2d32Pseudo : VSTQPseudo; - def VST2q8Pseudo : VSTQQPseudo; def VST2q16Pseudo : VSTQQPseudo; def VST2q32Pseudo : VSTQQPseudo; @@ -1614,35 +1791,28 @@ multiclass VST2QWB op7_4, string Dt> { } } -defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; -defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; -defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8PseudoWB_fixed : VSTQWBPseudo; -def VST2d16PseudoWB_fixed : VSTQWBPseudo; -def VST2d32PseudoWB_fixed : VSTQWBPseudo; -def VST2d8PseudoWB_register : VSTQWBPseudo; -def VST2d16PseudoWB_register : VSTQWBPseudo; -def VST2d32PseudoWB_register : VSTQWBPseudo; - -def VST2q8PseudoWB_fixed : VSTQQWBPseudo; -def VST2q16PseudoWB_fixed : VSTQQWBPseudo; -def VST2q32PseudoWB_fixed : VSTQQWBPseudo; -def VST2q8PseudoWB_register : VSTQQWBPseudo; -def VST2q16PseudoWB_register : VSTQQWBPseudo; -def VST2q32PseudoWB_register : VSTQQWBPseudo; +def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo; +def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo; +def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo; +def VST2q8PseudoWB_register : VSTQQWBregisterPseudo; +def VST2q16PseudoWB_register : VSTQQWBregisterPseudo; +def VST2q32PseudoWB_register : VSTQQWBregisterPseudo; // ...with double-spaced registers -def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; -def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; -def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; -defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; // VST3 : Vector Store (multiple 3-element structures) class VST3D op11_8, bits<4> op7_4, string Dt> @@ -1789,20 +1959,11 @@ class VSTQQQQLNWBPseudo // VST1LN : Vector Store (single element from one lane) class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), - IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVST1LN"; -} -class VST1LN32 op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), + (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } @@ -1813,16 +1974,17 @@ class VST1QLNPseudo } def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } -def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, + addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -1838,14 +2000,14 @@ def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), // ...with address register writeback: class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, + (ins AdrMode:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - addrmode6:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo @@ -1855,16 +2017,16 @@ class VST1QLNWBPseudo } def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, - extractelt> { + extractelt, addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -2097,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +// Use vld1/vst1 for unaligned f64 load / store +def : Pat<(f64 (hword_alignedload addrmode6:$addr)), + (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (byte_alignedload addrmode6:$addr)), + (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), + (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; +def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; //===----------------------------------------------------------------------===// // NEON pattern fragments @@ -2159,14 +2334,14 @@ class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; @@ -2184,7 +2359,7 @@ class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyD, ValueType TyQ, Intrinsic IntOp> + ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> : N2V; @@ -2202,7 +2377,7 @@ class N2VL op24_23, bits<2> op21_20, bits<2> op19_18, class N2VLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp> + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> : N2V; @@ -2227,6 +2402,8 @@ class N3VD op21_20, bits<4> op11_8, bit op4, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } // Same as N3VD but no data type. @@ -2238,6 +2415,8 @@ class N3VDX op21_20, bits<4> op11_8, bit op4, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, "$Vd, $Vn, $Vm", "", [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } @@ -2250,6 +2429,8 @@ class N3VDSL op21_20, bits<4> op11_8, [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } class N3VDSL16 op21_20, bits<4> op11_8, @@ -2260,6 +2441,8 @@ class N3VDSL16 op21_20, bits<4> op11_8, [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } @@ -2270,6 +2453,8 @@ class N3VQ op21_20, bits<4> op11_8, bit op4, (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VQX op21_20, bits<4> op11_8, bit op4, @@ -2279,6 +2464,8 @@ class N3VQX op21_20, bits<4> op11_8, bit op4, (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, OpcodeStr, "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VQSL op21_20, bits<4> op11_8, @@ -2291,6 +2478,8 @@ class N3VQSL op21_20, bits<4> op11_8, (ResTy (ShOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, @@ -2302,21 +2491,25 @@ class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, (ResTy (ShOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_8:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2327,7 +2520,7 @@ class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2338,26 +2531,29 @@ class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VDIntSh op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V { + let TwoOperandAliasConstraint = "$Vm = $Vd"; let isCommutable = 0; } class N3VQInt op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2369,7 +2565,7 @@ class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2381,11 +2577,12 @@ class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VQIntSh op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V { + let TwoOperandAliasConstraint = "$Vm = $Vd"; let isCommutable = 0; } @@ -2465,7 +2662,7 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, // Neon Intrinsic-Op instructions (VABA): double- and quad-register. class N3VDIntOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp, SDNode OpNode> + ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> : N3V op21_20, bits<4> op11_8, bit op4, (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; class N3VQIntOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp, SDNode OpNode> + ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> : N3V op21_20, bits<4> op11_8, bit op4, // The destination register is also used as the first source operand register. class N3VDInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V op21_20, bits<4> op11_8, bit op4, (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; class N3VQInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V op21_20, bits<4> op11_8, // Long Intrinsic-Op vector operations with explicit extend (VABAL). class N3VLIntExtOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> : N3V op21_20, bits<4> op11_8, bit op4, // a quad-register and is also used as the first source operand register. class N3VLInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp> + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> : N3V op21_20, bits<4> op11_8, bit op4, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane32 op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]>; class N3VLInt3SL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane16 op21_20, bits<4> op11_8, // Narrowing 3-register intrinsics. class N3VNInt op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, - Intrinsic IntOp, bit Commutable> + SDPatternOperator IntOp, bit Commutable> : N3V op21_20, bits<4> op11_8, bit op4, // Long 3-register intrinsics with explicit extend (VABDL). class N3VLIntExt op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, bit Commutable> : N3V op21_20, bits<4> op11_8, bit op4, // Long 3-register intrinsics. class N3VLInt op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> : N3V op21_20, bits<4> op11_8, bit op4, } class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane32 op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]>; class N3VLIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane16 op21_20, bits<4> op11_8, bit op4, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), (TyQ (ExtOp (TyD DPR:$Vm)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } @@ -2696,14 +2895,14 @@ class N3VW op21_20, bits<4> op11_8, bit op4, class N2VDPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; @@ -2714,7 +2913,7 @@ class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, class N2VDPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V op24_23, bits<2> op21_20, bits<2> op19_18, class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { class N2VDSh op11_8, bit op7, bit op4, Format f, InstrItinClass itin, Operand ImmTy, string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> @@ -2744,6 +2944,7 @@ class N2VQSh op11_8, bit op7, bit op4, (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; +} // Long shift by immediate. class N2VLSh op11_8, bit op7, bit op6, bit op4, @@ -2767,6 +2968,7 @@ class N2VNSh op11_8, bit op7, bit op6, bit op4, // Shift right by immediate and accumulate, // both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { class N2VDShAdd op11_8, bit op7, bit op4, Operand ImmTy, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> @@ -2783,9 +2985,11 @@ class N2VQShAdd op11_8, bit op7, bit op4, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (add QPR:$src1, (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; +} // Shift by immediate and insert, // both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { class N2VDShIns op11_8, bit op7, bit op4, Operand ImmTy, Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> @@ -2800,19 +3004,20 @@ class N2VQShIns op11_8, bit op7, bit op4, (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; +} // Convert, with fractional bits immediate, // both double- and quad-register. class N2VCvtD op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + SDPatternOperator IntOp> : N2VImm; class N2VCvtQ op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + SDPatternOperator IntOp> : N2VImm op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N2VDInt; @@ -2923,7 +3128,7 @@ multiclass N2VN_HSD op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp> { + SDPatternOperator IntOp> { def v8i8 : N2VNInt; @@ -3011,7 +3216,7 @@ multiclass N3VInt_HS op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { + SDPatternOperator IntOp, bit Commutable = 0> { // 64-bit vector types. def v4i16 : N3VDInt op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp> { + SDPatternOperator IntOp> { // 64-bit vector types. def v4i16 : N3VDIntSh op11_8, bit op4, Format f, multiclass N3VIntSL_HS op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, @@ -3069,7 +3274,7 @@ multiclass N3VInt_QHS op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> + SDPatternOperator IntOp, bit Commutable = 0> : N3VInt_HS { def v8i8 : N3VDInt op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp> + SDPatternOperator IntOp> : N3VInt_HSSh { def v8i8 : N3VDIntSh op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> + SDPatternOperator IntOp, bit Commutable = 0> : N3VInt_QHS { def v1i64 : N3VDInt op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp> + SDPatternOperator IntOp> : N3VInt_QHSSh { def v1i64 : N3VDIntSh op11_8, bit op4, Format f, // source operand element sizes of 16, 32 and 64 bits: multiclass N3VNInt_HSD op11_8, bit op4, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { + SDPatternOperator IntOp, bit Commutable = 0> { def v8i8 : N3VNInt; @@ -3189,7 +3394,7 @@ multiclass N3VLExt_QHS op11_8, bit op4, multiclass N3VLInt_HS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { + SDPatternOperator IntOp, bit Commutable = 0> { def v4i32 : N3VLInt; @@ -3200,7 +3405,7 @@ multiclass N3VLInt_HS op11_8, bit op4, multiclass N3VLIntSL_HS op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp> { + SDPatternOperator IntOp> { def v4i16 : N3VLIntSL16; def v2i32 : N3VLIntSL op11_8, multiclass N3VLInt_QHS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> + SDPatternOperator IntOp, bit Commutable = 0> : N3VLInt_HS { def v8i16 : N3VLInt op11_8, bit op4, // ....with explicit extend (VABDL). multiclass N3VLIntExt_QHS op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { + SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { def v8i16 : N3VLIntExt; @@ -3295,7 +3500,7 @@ multiclass N3VMulOpSL_HS op11_8, // element sizes of 8, 16 and 32 bits: multiclass N3VIntOp_QHS op11_8, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp, + string OpcodeStr, string Dt, SDPatternOperator IntOp, SDNode OpNode> { // 64-bit vector types. def v8i8 : N3VDIntOp op11_8, bit op4, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS op11_8, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N3VDInt3; @@ -3365,7 +3570,7 @@ multiclass N3VLMulOpSL_HS op11_8, string OpcodeStr, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { def v4i32 : N3VLInt3; def v2i64 : N3VLInt3 op11_8, bit op4, } multiclass N3VLInt3SL_HS op11_8, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { def v4i16 : N3VLInt3SL16; def v2i32 : N3VLInt3SL op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, Intrinsic IntOp> + string OpcodeStr, string Dt, SDPatternOperator IntOp> : N3VLInt3_HS { def v8i16 : N3VLInt3; @@ -3392,7 +3597,7 @@ multiclass N3VLInt3_QHS op11_8, bit op4, // ....with explicit extend (VABAL). multiclass N3VLIntExtOp_QHS op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { + SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { def v8i16 : N3VLIntExtOp; @@ -3409,7 +3614,7 @@ multiclass N3VLIntExtOp_QHS op11_8, bit op4, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt; @@ -3432,7 +3637,7 @@ multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt2; @@ -3493,7 +3698,7 @@ multiclass N2VShL_QHSD op11_8, bit op4, } multiclass N2VShR_QHSD op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { + string baseOpc, SDNode OpNode> { // 64-bit vector types. def v8i8 : N2VDSh { @@ -3837,10 +4042,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -3895,10 +4100,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -3947,6 +4152,37 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; +// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. +def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", + v2f32, fmul_su, fadd_mlx>, + Requires<[HasVFP4,UseFusedMAC]>; + +def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", + v4f32, fmul_su, fadd_mlx>, + Requires<[HasVFP4,UseFusedMAC]>; + +// Fused Vector Multiply Subtract (floating-point) +def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", + v2f32, fmul_su, fsub_mlx>, + Requires<[HasVFP4,UseFusedMAC]>; +def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", + v4f32, fmul_su, fsub_mlx>, + Requires<[HasVFP4,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), + (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), + (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), + (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), + (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) @@ -4106,6 +4342,7 @@ def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, // VBIC : Vector Bitwise Bit Clear (AND NOT) +let TwoOperandAliasConstraint = "$Vn = $Vd" in { def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, "vbic", "$Vd, $Vn, $Vm", "", @@ -4116,6 +4353,7 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), "vbic", "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (v4i32 (and QPR:$Vn, (vnotq QPR:$Vm))))]>; +} def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), @@ -4447,8 +4685,10 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; -defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", + NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", + NEONvshru>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -4482,8 +4722,10 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; -defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", + NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", + NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -4617,22 +4859,24 @@ defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, // VCLZ : Vector Count Leading Zeros defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", - int_arm_neon_vclz>; + ctlz>; // VCNT : Vector Count One Bits def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiD, "vcnt", "8", - v8i8, v8i8, int_arm_neon_vcnt>; + v8i8, v8i8, ctpop>; def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", - v16i8, v16i8, int_arm_neon_vcnt>; + v16i8, v16i8, ctpop>; // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", + []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", + []>; // Vector Move Operations. @@ -4801,25 +5045,23 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), GPR:$R, imm:$lane))]> { let Inst{21} = lane{0}; } + +def VSETLNi8Q : PseudoNeonI<(outs QPR:$V), + (ins QPR:$src1, GPR:$R, VectorIndex8:$lane), + IIC_VMOVISL, "", + [(set QPR:$V, (vector_insert (v16i8 QPR:$src1), + GPR:$R, imm:$lane))]>; +def VSETLNi16Q : PseudoNeonI<(outs QPR:$V), + (ins QPR:$src1, GPR:$R, VectorIndex16:$lane), + IIC_VMOVISL, "", + [(set QPR:$V, (vector_insert (v8i16 QPR:$src1), + GPR:$R, imm:$lane))]>; } -def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), - (v16i8 (INSERT_SUBREG QPR:$src1, - (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i8_reg imm:$lane))), - GPR:$src2, (SubReg_i8_lane imm:$lane))), - (DSubReg_i8_reg imm:$lane)))>; -def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), - (v8i16 (INSERT_SUBREG QPR:$src1, - (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i16_reg imm:$lane))), - GPR:$src2, (SubReg_i16_lane imm:$lane))), - (DSubReg_i16_reg imm:$lane)))>; + def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, - (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i32_reg imm:$lane))), - GPR:$src2, (SubReg_i32_lane imm:$lane))), - (DSubReg_i32_reg imm:$lane)))>; + (v4i32 (INSERT_SUBREG QPR:$src1, + GPR:$src2, + (SSubReg_f32_reg imm:$lane)))>; def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), @@ -4964,6 +5206,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, // VMOVL : Vector Lengthening Move defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; +def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; +def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; +def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; // Vector Conversions. @@ -5100,6 +5345,9 @@ def : AlignedVEXTq; // VEXT : Vector Extract + +// All of these have a two-operand InstAlias. +let TwoOperandAliasConstraint = "$Vn = $Vd" in { class VEXTd : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, @@ -5119,6 +5367,7 @@ class VEXTq bits<4> index; let Inst{11-8} = index{3-0}; } +} def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { let Inst{11-8} = index{3-0}; @@ -5170,7 +5419,9 @@ def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; +// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; @@ -5180,7 +5431,9 @@ def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; +// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; @@ -5198,21 +5451,19 @@ def VTBL1 let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; + (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; + (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), + (ins VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBL2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; def VTBL3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; def VTBL4Pseudo @@ -5228,24 +5479,21 @@ def VTBX1 let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; + (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), + (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX4 - : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", + : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBX2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), - IIC_VTBX2, "$orig = $dst", []>; def VTBX3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), IIC_VTBX3, "$orig = $dst", []>; @@ -5295,9 +5543,13 @@ def : N3VSPat; def : N3VSPat; def : N3VSPat; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; +def : N3VSMulOpPat, + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; +def : N3VSMulOpPat, + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N2VSPat; def : N2VSPat; def : N3VSPat; @@ -5374,6 +5626,156 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +// Vector lengthening move with load, matching extending loads. + +// extload, zextload and sextload for a standard lengthening load. Example: +// Lengthen_Single<"8", "i16", "8"> = +// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) +// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0)))>; +multiclass Lengthen_Single { + let AddedComplexity = 10 in { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadvi" # SrcTy) addrmode6:$addr)), + (!cast("VMOVLuv" # DestLanes # DestTy) + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadvi" # SrcTy) addrmode6:$addr)), + (!cast("VMOVLuv" # DestLanes # DestTy) + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadvi" # SrcTy) addrmode6:$addr)), + (!cast("VMOVLsv" # DestLanes # DestTy) + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + } +} + +// extload, zextload and sextload for a lengthening load which only uses +// half the lanes available. Example: +// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = +// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), +// dsub_0)>; +multiclass Lengthen_HalfSingle { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLsv" # InsnLanes # InsnTy) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length. +// +// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), +// (i32 0))), +// dsub_0)), +// dsub_0)>; +multiclass Lengthen_Double { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length, but which ends up only +// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). +// +// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), +// dsub_0)), +// dsub_0)>; +multiclass Lengthen_HalfDouble { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), + dsub_0)>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), + dsub_0)>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), + dsub_0)>; +} + +defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 +defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 +defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 + +defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 +defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 + +// Double lengthening - v4i8 -> v4i16 -> v4i32 +defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; +// v2i8 -> v2i16 -> v2i32 +defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; +// v2i16 -> v2i32 -> v2i64 +defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; + +// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 +def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; //===----------------------------------------------------------------------===// // Assembler aliases @@ -5384,408 +5786,791 @@ def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; - -// VADD two-operand aliases. -def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", - (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", - (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", - (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", - (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", - (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", - (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", - (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", - (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", - (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", - (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VSUB two-operand aliases. -def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", - (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", - (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", - (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", - (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", - (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", - (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", - (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", - (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", - (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", - (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VADDW two-operand aliases. -def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", - (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", - (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", - (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", - (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", - (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", - (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; - // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. -defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; // ... two-operand aliases -def : NEONInstAlias<"vand${p} $Vdn, $Vm", - (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vand${p} $Vdn, $Vm", - (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vbic${p} $Vdn, $Vm", - (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vbic${p} $Vdn, $Vm", - (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"veor${p} $Vdn, $Vm", - (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"veor${p} $Vdn, $Vm", - (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vorr${p} $Vdn, $Vm", - (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vorr${p} $Vdn, $Vm", - (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -// VMUL two-operand aliases. -def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", - (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", - (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", - (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", - (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", - (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", - (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", - (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", - (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", - (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", - (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", - (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, - VectorIndex16:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", - (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, - VectorIndex16:$lane, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", - (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", - (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", - (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", - (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; - -// VQADD (register) two-operand aliases. -def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", - (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", - (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", - (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", - (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", - (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", - (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", - (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", - (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", - (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", - (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", - (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", - (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", - (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", - (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", - (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", - (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VSHL (immediate) two-operand aliases. -def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", - (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", - (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", - (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", - (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; - -def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", - (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", - (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", - (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", - (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; - -// VSHL (register) two-operand aliases. -def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", - (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", - (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", - (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", - (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", - (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", - (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", - (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", - (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", - (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", - (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", - (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", - (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", - (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", - (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", - (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", - (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VSHL (immediate) two-operand aliases. -def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", - (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", - (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", - (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", - (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", - (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", - (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", - (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", - (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", - (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", - (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", - (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", - (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", - (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", - (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", - (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", - (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; - // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; // VST1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; // VLD2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; // VST2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD3DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VST3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VST3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD4 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD4DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + + +// VLD4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VST4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VST4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; // VMOV takes an optional datatype suffix -defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", + (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", + (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", + (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", + (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", + (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", + (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", + (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", + (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", + (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", + (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", + (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", + (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", + (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", + (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", @@ -5818,33 +6603,35 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -// Two-operand variants for VEXT -def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", - (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", - (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", - (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; - -def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", - (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", - (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", - (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", - (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; - -// Two-operand variants for VQDMULH -def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", - (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", - (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", - (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", - (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +// VSWP allows, but does not require, a type suffix. +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; + +// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; + +// "vmov Rd, #-imm" can be handled via "vmvn". +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; // 'gas' compatibility aliases for quad-word instructions. Strictly speaking, // these should restrict to just the Q register variants, but the register @@ -5857,6 +6644,10 @@ def : NEONMnemonicAlias<"vorrq", "vorr">; def : NEONMnemonicAlias<"vmovq", "vmov">; def : NEONMnemonicAlias<"vmvnq", "vmvn">; +// Explicit versions for floating point so that the FPImm variants get +// handled early. The parser gets confused otherwise. +def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; +def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; def : NEONMnemonicAlias<"vaddq", "vadd">; def : NEONMnemonicAlias<"vsubq", "vsub">; @@ -5875,3 +6666,18 @@ def : NEONMnemonicAlias<"vcvtq", "vcvt">; def : NEONMnemonicAlias<"vcleq", "vcle">; def : NEONMnemonicAlias<"vceqq", "vceq">; + +def : NEONMnemonicAlias<"vzipq", "vzip">; +def : NEONMnemonicAlias<"vswpq", "vswp">; + +def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; +def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; + + +// Alias for loading floating point immediates that aren't representable +// using the vmov.f32 encoding but the bitpattern is representable using +// the .i32 encoding. +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;