X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMInstrNEON.td;h=6ae755eb68520035ecc162592050c5e748864c3f;hb=400c95fe3802821815c69077e48c8fd276ec6494;hp=fbe70824d4ab6a073888f8d701726763521c359a;hpb=58393bc3fdcc0d1f58f4da72388e44d5ed62f927;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index fbe70824d4a..6ae755eb685 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -98,68 +98,69 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; // NEON operand definitions //===----------------------------------------------------------------------===// -def h8imm : Operand { - let PrintMethod = "printHex8ImmOperand"; -} -def h16imm : Operand { - let PrintMethod = "printHex16ImmOperand"; -} -def h32imm : Operand { - let PrintMethod = "printHex32ImmOperand"; -} -def h64imm : Operand { - let PrintMethod = "printHex64ImmOperand"; +def nModImm : Operand { + let PrintMethod = "printNEONModImmOperand"; } //===----------------------------------------------------------------------===// // NEON load / store instructions //===----------------------------------------------------------------------===// +let mayLoad = 1, neverHasSideEffects = 1 in { // Use vldmia to load a Q register as a D register pair. -def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, - "vldmia", "$addr, ${dst:dregpair}", - [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { - let Inst{27-25} = 0b110; - let Inst{24} = 0; // P bit - let Inst{23} = 1; // U bit - let Inst{20} = 1; - let Inst{11-8} = 0b1011; -} - +// This is equivalent to VLDMD except that it has a Q register operand +// instead of a pair of D registers. +def VLDMQ + : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), + IndexModeNone, IIC_fpLoadm, + "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; + +// Use vld1 to load a Q register as a D register pair. +// This alternative to VLDMQ allows an alignment to be specified. +// This is equivalent to VLD1q64 except that it has a Q register operand. +def VLD1q + : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), + IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; +} // mayLoad = 1, neverHasSideEffects = 1 + +let mayStore = 1, neverHasSideEffects = 1 in { // Use vstmia to store a Q register as a D register pair. -def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, - "vstmia", "$addr, ${src:dregpair}", - [(store (v2f64 QPR:$src), addrmode4:$addr)]> { - let Inst{27-25} = 0b110; - let Inst{24} = 0; // P bit - let Inst{23} = 1; // U bit - let Inst{20} = 0; - let Inst{11-8} = 0b1011; -} +// This is equivalent to VSTMD except that it has a Q register operand +// instead of a pair of D registers. +def VSTMQ + : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), + IndexModeNone, IIC_fpStorem, + "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; + +// Use vst1 to store a Q register as a D register pair. +// This alternative to VSTMQ allows an alignment to be specified. +// This is equivalent to VST1q64 except that it has a Q register operand. +def VST1q + : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), + IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; +} // mayStore = 1, neverHasSideEffects = 1 + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) -class VLD1D op7_4, string Dt, ValueType Ty> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - "vld1", Dt, "\\{$dst\\}, $addr", "", - [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; -class VLD1Q op7_4, string Dt, ValueType Ty> - : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - "vld1", Dt, "${dst:dregpair}, $addr", "", - [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; - -def VLD1d8 : VLD1D<0b0000, "8", v8i8>; -def VLD1d16 : VLD1D<0b0100, "16", v4i16>; -def VLD1d32 : VLD1D<0b1000, "32", v2i32>; -def VLD1df : VLD1D<0b1000, "32", v2f32>; -def VLD1d64 : VLD1D<0b1100, "64", v1i64>; - -def VLD1q8 : VLD1Q<0b0000, "8", v16i8>; -def VLD1q16 : VLD1Q<0b0100, "16", v8i16>; -def VLD1q32 : VLD1Q<0b1000, "32", v4i32>; -def VLD1qf : VLD1Q<0b1000, "32", v4f32>; -def VLD1q64 : VLD1Q<0b1100, "64", v2i64>; - -let mayLoad = 1 in { +class VLD1D op7_4, string Dt> + : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), + (ins addrmode6:$addr), IIC_VLD1, + "vld1", Dt, "\\{$dst\\}, $addr", "", []>; +class VLD1Q op7_4, string Dt> + : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr), IIC_VLD1, + "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; + +def VLD1d8 : VLD1D<0b0000, "8">; +def VLD1d16 : VLD1D<0b0100, "16">; +def VLD1d32 : VLD1D<0b1000, "32">; +def VLD1d64 : VLD1D<0b1100, "64">; + +def VLD1q8 : VLD1Q<0b0000, "8">; +def VLD1q16 : VLD1Q<0b0100, "16">; +def VLD1q32 : VLD1Q<0b1000, "32">; +def VLD1q64 : VLD1Q<0b1100, "64">; // ...with address register writeback: class VLD1DWB op7_4, string Dt> @@ -182,35 +183,32 @@ def VLD1q8_UPD : VLD1QWB<0b0000, "8">; def VLD1q16_UPD : VLD1QWB<0b0100, "16">; def VLD1q32_UPD : VLD1QWB<0b1000, "32">; def VLD1q64_UPD : VLD1QWB<0b1100, "64">; -} // mayLoad = 1 -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { - -// These (dreg triple/quadruple) are for disassembly only. +// ...with 3 registers (some of these are only for the disassembler): class VLD1D3 op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -class VLD1D4 op7_4, string Dt> - : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; - -def VLD1d8T : VLD1D3<0b0000, "8">; -def VLD1d16T : VLD1D3<0b0100, "16">; -def VLD1d32T : VLD1D3<0b1000, "32">; -def VLD3d64 : VLD1D3<0b1100, "64">; - -def VLD1d8Q : VLD1D4<0b0000, "8">; -def VLD1d16Q : VLD1D4<0b0100, "16">; -def VLD1d32Q : VLD1D4<0b1000, "32">; -def VLD4d64 : VLD1D4<0b1100, "64">; - -// ...with address register writeback: class VLD1D3WB op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; + +def VLD1d8T : VLD1D3<0b0000, "8">; +def VLD1d16T : VLD1D3<0b0100, "16">; +def VLD1d32T : VLD1D3<0b1000, "32">; +def VLD1d64T : VLD1D3<0b1100, "64">; + +def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; +def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; +def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; +def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; + +// ...with 4 registers (some of these are only for the disassembler): +class VLD1D4 op7_4, string Dt> + : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; class VLD1D4WB op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), @@ -218,15 +216,15 @@ class VLD1D4WB op7_4, string Dt> "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", []>; -def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; -def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; -def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; -def VLD3d64_UPD : VLD1D3WB<0b1100, "64">; +def VLD1d8Q : VLD1D4<0b0000, "8">; +def VLD1d16Q : VLD1D4<0b0100, "16">; +def VLD1d32Q : VLD1D4<0b1000, "32">; +def VLD1d64Q : VLD1D4<0b1100, "64">; def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; -def VLD4d64_UPD : VLD1D4WB<0b1100, "64">; +def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; // VLD2 : Vector Load (multiple 2-element structures) class VLD2D op11_8, bits<4> op7_4, string Dt> @@ -242,9 +240,6 @@ class VLD2Q op7_4, string Dt> def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; -def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>; def VLD2q8 : VLD2Q<0b0000, "8">; def VLD2q16 : VLD2Q<0b0100, "16">; @@ -266,11 +261,6 @@ class VLD2QWB op7_4, string Dt> def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; -def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100, - (outs DPR:$dst1, DPR:$dst2, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, - "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset", - "$addr.addr = $wb", []>; def VLD2q8_UPD : VLD2QWB<0b0000, "8">; def VLD2q16_UPD : VLD2QWB<0b0100, "16">; @@ -474,33 +464,28 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; // VLD3DUP : Vector Load (single 3-element structure to all lanes) // VLD4DUP : Vector Load (single 4-element structure to all lanes) // FIXME: Not yet implemented. -} // mayLoad = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 + +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST1 : Vector Store (multiple single elements) -class VST1D op7_4, string Dt, ValueType Ty> +class VST1D op7_4, string Dt> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - "vst1", Dt, "\\{$src\\}, $addr", "", - [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q op7_4, string Dt, ValueType Ty> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, - "vst1", Dt, "${src:dregpair}, $addr", "", - [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>; - -let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<0b0000, "8", v8i8>; -def VST1d16 : VST1D<0b0100, "16", v4i16>; -def VST1d32 : VST1D<0b1000, "32", v2i32>; -def VST1df : VST1D<0b1000, "32", v2f32>; -def VST1d64 : VST1D<0b1100, "64", v1i64>; - -def VST1q8 : VST1Q<0b0000, "8", v16i8>; -def VST1q16 : VST1Q<0b0100, "16", v8i16>; -def VST1q32 : VST1Q<0b1000, "32", v4i32>; -def VST1qf : VST1Q<0b1000, "32", v4f32>; -def VST1q64 : VST1Q<0b1100, "64", v2i64>; -} // hasExtraSrcRegAllocReq - -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { + "vst1", Dt, "\\{$src\\}, $addr", "", []>; +class VST1Q op7_4, string Dt> + : NLdSt<0,0b00,0b1010,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, + "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>; + +def VST1d8 : VST1D<0b0000, "8">; +def VST1d16 : VST1D<0b0100, "16">; +def VST1d32 : VST1D<0b1000, "32">; +def VST1d64 : VST1D<0b1100, "64">; + +def VST1q8 : VST1Q<0b0000, "8">; +def VST1q16 : VST1Q<0b0100, "16">; +def VST1q32 : VST1Q<0b1000, "32">; +def VST1q64 : VST1Q<0b1100, "64">; // ...with address register writeback: class VST1DWB op7_4, string Dt> @@ -522,34 +507,34 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">; def VST1q32_UPD : VST1QWB<0b1000, "32">; def VST1q64_UPD : VST1QWB<0b1100, "64">; -// These (dreg triple/quadruple) are for disassembly only. +// ...with 3 registers (some of these are only for the disassembler): class VST1D3 op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; -class VST1D4 op7_4, string Dt> - : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", - []>; - -def VST1d8T : VST1D3<0b0000, "8">; -def VST1d16T : VST1D3<0b0100, "16">; -def VST1d32T : VST1D3<0b1000, "32">; -def VST3d64 : VST1D3<0b1100, "64">; - -def VST1d8Q : VST1D4<0b0000, "8">; -def VST1d16Q : VST1D4<0b0100, "16">; -def VST1d32Q : VST1D4<0b1000, "32">; -def VST4d64 : VST1D4<0b1100, "64">; - -// ...with address register writeback: class VST1D3WB op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", "$addr.addr = $wb", []>; + +def VST1d8T : VST1D3<0b0000, "8">; +def VST1d16T : VST1D3<0b0100, "16">; +def VST1d32T : VST1D3<0b1000, "32">; +def VST1d64T : VST1D3<0b1100, "64">; + +def VST1d8T_UPD : VST1D3WB<0b0000, "8">; +def VST1d16T_UPD : VST1D3WB<0b0100, "16">; +def VST1d32T_UPD : VST1D3WB<0b1000, "32">; +def VST1d64T_UPD : VST1D3WB<0b1100, "64">; + +// ...with 4 registers (some of these are only for the disassembler): +class VST1D4 op7_4, string Dt> + : NLdSt<0, 0b00, 0b0010, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", + []>; class VST1D4WB op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, @@ -557,15 +542,15 @@ class VST1D4WB op7_4, string Dt> IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", "$addr.addr = $wb", []>; -def VST1d8T_UPD : VST1D3WB<0b0000, "8">; -def VST1d16T_UPD : VST1D3WB<0b0100, "16">; -def VST1d32T_UPD : VST1D3WB<0b1000, "32">; -def VST3d64_UPD : VST1D3WB<0b1100, "64">; +def VST1d8Q : VST1D4<0b0000, "8">; +def VST1d16Q : VST1D4<0b0100, "16">; +def VST1d32Q : VST1D4<0b1000, "32">; +def VST1d64Q : VST1D4<0b1100, "64">; def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; -def VST4d64_UPD : VST1D4WB<0b1100, "64">; +def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; // VST2 : Vector Store (multiple 2-element structures) class VST2D op11_8, bits<4> op7_4, string Dt> @@ -581,9 +566,6 @@ class VST2Q op7_4, string Dt> def VST2d8 : VST2D<0b1000, 0b0000, "8">; def VST2d16 : VST2D<0b1000, 0b0100, "16">; def VST2d32 : VST2D<0b1000, 0b1000, "32">; -def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>; def VST2q8 : VST2Q<0b0000, "8">; def VST2q16 : VST2Q<0b0100, "16">; @@ -605,11 +587,6 @@ class VST2QWB op7_4, string Dt> def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; -def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2), IIC_VST, - "vst1", "64", "\\{$src1, $src2\\}, $addr$offset", - "$addr.addr = $wb", []>; def VST2q8_UPD : VST2QWB<0b0000, "8">; def VST2q16_UPD : VST2QWB<0b0100, "16">; @@ -802,7 +779,7 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; -} // mayStore = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 //===----------------------------------------------------------------------===// @@ -810,27 +787,27 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; //===----------------------------------------------------------------------===// // Extract D sub-registers of Q registers. -// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) def DSubReg_i8_reg : SDNodeXFormgetTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); }]>; def DSubReg_i16_reg : SDNodeXFormgetTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); }]>; def DSubReg_i32_reg : SDNodeXFormgetTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); }]>; def DSubReg_f64_reg : SDNodeXFormgetTargetConstant(5 + N->getZExtValue(), MVT::i32); -}]>; -def DSubReg_f64_other_reg : SDNodeXFormgetTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); }]>; // Extract S sub-registers of Q/D registers. -// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) def SSubReg_f32_reg : SDNodeXFormgetTargetConstant(1 + N->getZExtValue(), MVT::i32); + assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); }]>; // Translate lane numbers from Q registers to D subregs. @@ -859,18 +836,18 @@ class N2VD op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, + bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, + IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { let isCommutable = Commutable; } @@ -928,7 +905,7 @@ class N3VD op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { let isCommutable = Commutable; @@ -939,27 +916,28 @@ class N3VDX op21_20, bits<4> op11_8, bit op4, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX{ let isCommutable = Commutable; } + class N3VDSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{ + (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> { let isCommutable = 0; } class N3VDSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { @@ -970,7 +948,7 @@ class N3VQ op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { let isCommutable = Commutable; @@ -979,7 +957,7 @@ class N3VQX op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX{ let isCommutable = Commutable; @@ -989,7 +967,7 @@ class N3VQSL op21_20, bits<4> op11_8, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -1000,7 +978,7 @@ class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -1010,10 +988,10 @@ class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; @@ -1022,7 +1000,7 @@ class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -1033,19 +1011,18 @@ class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_8:$src2), - imm:$lane)))))]> { + (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { let isCommutable = 0; } class N3VQInt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; @@ -1055,7 +1032,7 @@ class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -1067,7 +1044,7 @@ class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -1081,14 +1058,14 @@ class N3VSMulOp op21_20, bits<4> op11_8, bit op4, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; class N3VDMulOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; @@ -1097,7 +1074,8 @@ class N3VDMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, + (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), @@ -1109,7 +1087,8 @@ class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, + (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), @@ -1121,7 +1100,7 @@ class N3VQMulOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; @@ -1130,7 +1109,8 @@ class N3VQMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, + (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), @@ -1143,7 +1123,8 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, + (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), @@ -1157,7 +1138,7 @@ class N3VDInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; @@ -1165,7 +1146,7 @@ class N3VQInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; @@ -1176,7 +1157,7 @@ class N3VLInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V; @@ -1185,7 +1166,8 @@ class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; @@ -1221,7 +1204,7 @@ class N3VLInt op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; @@ -1230,8 +1213,8 @@ class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; @@ -1297,17 +1280,17 @@ class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. class N2VDSh op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm; class N2VQSh op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm; @@ -1316,8 +1299,8 @@ class N2VLSh op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; @@ -1326,7 +1309,7 @@ class N2VNSh op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; @@ -1336,14 +1319,14 @@ class N2VNSh op11_8, bit op7, bit op6, bit op4, class N2VDShAdd op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; class N2VQShAdd op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; @@ -1351,15 +1334,15 @@ class N2VQShAdd op11_8, bit op7, bit op4, // Shift by immediate and insert, // both double- and quad-register. class N2VDShIns op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> : N2VImm; class N2VQShIns op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> : N2VImm; @@ -1369,15 +1352,15 @@ class N2VCvtD op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; class N2VCvtQ op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; //===----------------------------------------------------------------------===// @@ -1521,24 +1504,24 @@ multiclass N2VLInt_QHS op24_23, bits<5> op11_7, bit op6, bit op4, // Neon 3-register vector intrinsics. // First with only element sizes of 16 and 32 bits: -multiclass N3VInt_HS op11_8, bit op4, +multiclass N3VInt_HS op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { // 64-bit vector types. - def v4i16 : N3VDInt; - def v2i32 : N3VDInt; // 128-bit vector types. - def v8i16 : N3VQInt; - def v4i32 : N3VQInt; } @@ -1558,38 +1541,37 @@ multiclass N3VIntSL_HS op11_8, } // ....then also with element size of 8 bits: -multiclass N3VInt_QHS op11_8, bit op4, +multiclass N3VInt_QHS op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_HS { - def v8i8 : N3VDInt; - def v16i8 : N3VQInt; } // ....then also with element size of 64 bits: -multiclass N3VInt_QHSD op11_8, bit op4, +multiclass N3VInt_QHSD op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_QHS { - def v1i64 : N3VDInt; - def v2i64 : N3VQInt; } - // Neon Narrowing 3-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N3VNInt_HSD op11_8, bit op4, @@ -1611,12 +1593,13 @@ multiclass N3VNInt_HSD op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { - def v4i32 : N3VLInt; - def v2i64 : N3VLInt; } @@ -1632,11 +1615,12 @@ multiclass N3VLIntSL_HS op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt_QHS op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS { - def v8i16 : N3VLInt; } @@ -1701,21 +1685,22 @@ multiclass N3VMulOpSL_HS op11_8, // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3; - def v4i16 : N3VDInt3; - def v2i32 : N3VDInt3; // 128-bit vector types. - def v16i8 : N3VQInt3; - def v8i16 : N3VQInt3; - def v4i32 : N3VQInt3; } @@ -1724,10 +1709,11 @@ multiclass N3VInt3_QHS op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, Intrinsic IntOp> { - def v4i32 : N3VLInt3; - def v2i64 : N3VLInt3; } @@ -1741,9 +1727,10 @@ multiclass N3VLInt3SL_HS op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, Intrinsic IntOp> - : N3VLInt3_HS { - def v8i16 : N3VLInt3 { + def v8i16 : N3VLInt3; } @@ -1819,46 +1806,46 @@ multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // Neon 2-register vector shift by immediate, +// with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: multiclass N2VSh_QHSD op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode, Format f> { // 64-bit vector types. - def v8i8 : N2VDSh { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDSh { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDSh { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDSh; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQSh { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQSh { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQSh { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQSh; // imm6 = xxxxxx } - // Neon Shift-Accumulate vector operations, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VShAdd_QHSD op11_8, bit op4, @@ -1900,41 +1887,43 @@ multiclass N2VShAdd_QHSD op11_8, bit op4, // Neon Shift-Insert vector operations, +// with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: multiclass N2VShIns_QHSD op11_8, bit op4, - string OpcodeStr, SDNode ShOp> { + string OpcodeStr, SDNode ShOp, + Format f> { // 64-bit vector types. def v8i8 : N2VDShIns { + f, OpcodeStr, "8", v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDShIns { + f, OpcodeStr, "16", v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDShIns { + f, OpcodeStr, "32", v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDShIns; + f, OpcodeStr, "64", v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQShIns { + f, OpcodeStr, "8", v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQShIns { + f, OpcodeStr, "16", v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQShIns { + f, OpcodeStr, "32", v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQShIns; + f, OpcodeStr, "64", v2i64, ShOp>; // imm6 = xxxxxx } @@ -1989,28 +1978,34 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s", - int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u", - int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "s", int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "u", int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; // VHADD : Vector Halving Add -defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>; -defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>; +defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vhadd", "s", int_arm_neon_vhadds, 1>; +defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vhadd", "u", int_arm_neon_vhaddu, 1>; // VRHADD : Vector Rounding Halving Add -defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>; -defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>; +defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vrhadd", "s", int_arm_neon_vrhadds, 1>; +defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vrhadd", "u", int_arm_neon_vrhaddu, 1>; // VQADD : Vector Saturating Add -defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>; -defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; +defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vqadd", "s", int_arm_neon_vqadds, 1>; +defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", int_arm_neon_vaddhn, 1>; @@ -2023,10 +2018,10 @@ defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", // VMUL : Vector Multiply (integer, polynomial and floating-point) defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; -def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8", - v8i8, v8i8, int_arm_neon_vmulp, 1>; -def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8", - v16i8, v16i8, int_arm_neon_vmulp, 1>; +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", + "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", + "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", @@ -2056,7 +2051,7 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VQDMULH : Vector Saturating Doubling Multiply Returning High Half -defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, +defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, @@ -2078,8 +2073,8 @@ def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half -defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, - IIC_VMULi16Q, IIC_VMULi32Q, +defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, + IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, @@ -2100,10 +2095,10 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s", - int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u", - int_arm_neon_vmullu, 1>; +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", int_arm_neon_vmullu, 1>; def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", @@ -2112,10 +2107,10 @@ defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", int_arm_neon_vmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s", - int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s", - int_arm_neon_vqdmull>; +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, + "vqdmull", "s", int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, + "vqdmull", "s", int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. @@ -2159,15 +2154,17 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "u", int_arm_neon_vmlalu>; defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) -defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s", - int_arm_neon_vqdmlal>; +defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlal", "s", int_arm_neon_vqdmlal>; defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; // VMLS : Vector Multiply Subtract (integer and floating-point) @@ -2209,15 +2206,17 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "u", int_arm_neon_vmlslu>; defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) -defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s", - int_arm_neon_vqdmlsl>; +defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; // Vector Subtract Operations. @@ -2230,26 +2229,26 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s", - int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u", - int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "s", int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "u", int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract -defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vhsub", "s", int_arm_neon_vhsubs, 0>; -defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vhsub", "u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract -defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "s", int_arm_neon_vqsubs, 0>; -defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", @@ -2261,8 +2260,8 @@ defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", // Vector Comparisons. // VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>; +defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, @@ -2272,12 +2271,12 @@ defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$dst, $src, #0">; // VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", - v2i32, v2f32, NEONvcge, 0>; +defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; +defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; +def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, + NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; // For disassembly only. @@ -2288,10 +2287,10 @@ defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$dst, $src, #0">; // VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>; +defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; +defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, @@ -2304,21 +2303,27 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", "$dst, $src, #0">; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32", - v2i32, v2f32, int_arm_neon_vacged, 0>; -def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32", - v4i32, v4f32, int_arm_neon_vacgeq, 0>; +def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", + "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; +def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", + "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32", - v2i32, v2f32, int_arm_neon_vacgtd, 0>; -def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32", - v4i32, v4f32, int_arm_neon_vacgtq, 0>; +def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", + "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; +def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", + "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; // Vector Bitwise Operations. +def vnot8 : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>; +def vnot16 : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>; + + // VAND : Vector Bitwise AND def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>; @@ -2339,74 +2344,80 @@ def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", // VBIC : Vector Bitwise Bit Clear (AND NOT) def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vbic", "$dst, $src1, $src2", "", - [(set DPR:$dst, (v2i32 (and DPR:$src1, - (vnot_conv DPR:$src2))))]>; + (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, + "vbic", "$dst, $src1, $src2", "", + [(set DPR:$dst, (v2i32 (and DPR:$src1, + (vnot8 DPR:$src2))))]>; def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vbic", "$dst, $src1, $src2", "", - [(set QPR:$dst, (v4i32 (and QPR:$src1, - (vnot_conv QPR:$src2))))]>; + (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, + "vbic", "$dst, $src1, $src2", "", + [(set QPR:$dst, (v4i32 (and QPR:$src1, + (vnot16 QPR:$src2))))]>; // VORN : Vector Bitwise OR NOT def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vorn", "$dst, $src1, $src2", "", - [(set DPR:$dst, (v2i32 (or DPR:$src1, - (vnot_conv DPR:$src2))))]>; + (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, + "vorn", "$dst, $src1, $src2", "", + [(set DPR:$dst, (v2i32 (or DPR:$src1, + (vnot8 DPR:$src2))))]>; def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vorn", "$dst, $src1, $src2", "", - [(set QPR:$dst, (v4i32 (or QPR:$src1, - (vnot_conv QPR:$src2))))]>; + (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, + "vorn", "$dst, $src1, $src2", "", + [(set QPR:$dst, (v4i32 (or QPR:$src1, + (vnot16 QPR:$src2))))]>; // VMVN : Vector Bitwise NOT def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, - "vmvn", "$dst, $src", "", - [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; + (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, + "vmvn", "$dst, $src", "", + [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>; def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, - "vmvn", "$dst, $src", "", - [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; -def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; -def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; + (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, + "vmvn", "$dst, $src", "", + [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>; +def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>; +def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, - "vbsl", "$dst, $src2, $src3", "$src1 = $dst", - [(set DPR:$dst, - (v2i32 (or (and DPR:$src2, DPR:$src1), - (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; + (ins DPR:$src1, DPR:$src2, DPR:$src3), + N3RegFrm, IIC_VCNTiD, + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", + [(set DPR:$dst, + (v2i32 (or (and DPR:$src2, DPR:$src1), + (and DPR:$src3, (vnot8 DPR:$src1)))))]>; def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, - "vbsl", "$dst, $src2, $src3", "$src1 = $dst", - [(set QPR:$dst, - (v4i32 (or (and QPR:$src2, QPR:$src1), - (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; + (ins QPR:$src1, QPR:$src2, QPR:$src3), + N3RegFrm, IIC_VCNTiQ, + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, + (v4i32 (or (and QPR:$src2, QPR:$src1), + (and QPR:$src3, (vnot16 QPR:$src1)))))]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VBINiD, "vbif", "$dst, $src2, $src3", "$src1 = $dst", + N3RegFrm, IIC_VBINiD, + "vbif", "$dst, $src2, $src3", "$src1 = $dst", [/* For disassembly only; pattern left blank */]>; def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), - IIC_VBINiQ, "vbif", "$dst, $src2, $src3", "$src1 = $dst", + N3RegFrm, IIC_VBINiQ, + "vbif", "$dst, $src2, $src3", "$src1 = $dst", [/* For disassembly only; pattern left blank */]>; // VBIT : Vector Bitwise Insert if True // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VBINiD, "vbit", "$dst, $src2, $src3", "$src1 = $dst", + N3RegFrm, IIC_VBINiD, + "vbit", "$dst, $src2, $src3", "$src1 = $dst", [/* For disassembly only; pattern left blank */]>; def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), - IIC_VBINiQ, "vbit", "$dst, $src2, $src3", "$src1 = $dst", + N3RegFrm, IIC_VBINiQ, + "vbit", "$dst, $src2, $src3", "$src1 = $dst", [/* For disassembly only; pattern left blank */]>; // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking @@ -2416,63 +2427,79 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // Vector Absolute Differences. // VABD : Vector Absolute Difference -defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabd", "s", int_arm_neon_vabds, 0>; -defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabd", "u", int_arm_neon_vabdu, 0>; -def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, +def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; -def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, +def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabdl", "s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabdl", "u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>; +defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "u", int_arm_neon_vabau>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; +defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, + "vabal", "s", int_arm_neon_vabals>; +defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, + "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum -defm VMAXs : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; -defm VMAXu : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32", +defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vmax", "s", int_arm_neon_vmaxs, 1>; +defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vmax", "u", int_arm_neon_vmaxu, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmax", "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32", +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmax", "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum -defm VMINs : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>; -defm VMINu : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32", +defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vmin", "s", int_arm_neon_vmins, 1>; +defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vmin", "u", int_arm_neon_vminu, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmin", "f32", v2f32, v2f32, int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32", +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmin", "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8", +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16", +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32", +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32", +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, + IIC_VBIND, "vpadd", "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long @@ -2488,36 +2515,36 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8", - v8i8, v8i8, int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16", - v4i16, v4i16, int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32", - v2i32, v2i32, int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8", - v8i8, v8i8, int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16", - v4i16, v4i16, int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32", - v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32", - v2f32, v2f32, int_arm_neon_vpmaxs, 0>; +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8", - v8i8, v8i8, int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16", - v4i16, v4i16, int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32", - v2i32, v2i32, int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8", - v8i8, v8i8, int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16", - v4i16, v4i16, int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32", - v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32", - v2f32, v2f32, int_arm_neon_vpmins, 0>; +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin", + "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. @@ -2536,10 +2563,10 @@ def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, v4f32, v4f32, int_arm_neon_vrecpe>; // VRECPS : Vector Reciprocal Step -def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, IIC_VRECSD, "vrecps", "f32", v2f32, v2f32, int_arm_neon_vrecps, 1>; -def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, IIC_VRECSQ, "vrecps", "f32", v4f32, v4f32, int_arm_neon_vrecps, 1>; @@ -2558,25 +2585,30 @@ def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, v4f32, v4f32, int_arm_neon_vrsqrte>; // VRSQRTS : Vector Reciprocal Square Root Step -def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, IIC_VRECSD, "vrsqrts", "f32", v2f32, v2f32, int_arm_neon_vrsqrts, 1>; -def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, IIC_VRECSQ, "vrsqrts", "f32", v4f32, v4f32, int_arm_neon_vrsqrts, 1>; // Vector Shifts. // VSHL : Vector Shift -defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>; -defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>; +defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm, + IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, + "vshl", "s", int_arm_neon_vshifts, 0>; +defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm, + IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, + "vshl", "u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; +defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, + N2RegVShLFrm>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>; +defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs, + N2RegVShRFrm>; +defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru, + N2RegVShRFrm>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -2602,28 +2634,37 @@ defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>; // VRSHL : Vector Rounding Shift -defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>; -defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>; +defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vrshl", "s", int_arm_neon_vrshifts, 0>; +defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vrshl", "u", int_arm_neon_vrshiftu, 0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, + N2RegVShRFrm>; +defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru, + N2RegVShRFrm>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", NEONvrshrn>; // VQSHL : Vector Saturating Shift -defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>; -defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>; +defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqshl", "s", int_arm_neon_vqshifts, 0>; +defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqshl", "u", int_arm_neon_vqshiftu, 0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, + N2RegVShLFrm>; +defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu, + N2RegVShLFrm>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu, + N2RegVShLFrm>; // VQSHRN : Vector Saturating Shift Right and Narrow defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", @@ -2636,12 +2677,12 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift -defm VQRSHLs : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl", "s", - int_arm_neon_vqrshifts, 0>; -defm VQRSHLu : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl", "u", - int_arm_neon_vqrshiftu, 0>; +defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqrshl", "s", int_arm_neon_vqrshifts, 0>; +defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqrshl", "u", int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", @@ -2661,9 +2702,9 @@ defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; // VSLI : Vector Shift Left and Insert -defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>; +defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>; // VSRI : Vector Shift Right and Insert -defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>; +defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; // Vector Absolute and Saturating Absolute. @@ -2685,19 +2726,22 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, // Vector Negate. -def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; -def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; +def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; +def vneg8 : PatFrag<(ops node:$in), + (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>; +def vneg16 : PatFrag<(ops node:$in), + (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>; class VNEGD size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; + [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>; class VNEGQ size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; + [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>; -// VNEG : Vector Negate +// VNEG : Vector Negate (integer) def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; @@ -2715,12 +2759,12 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, "vneg", "f32", "$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; -def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; -def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; -def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>; -def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>; -def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; -def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; +def : Pat<(v8i8 (vneg8 DPR:$src)), (VNEGs8d DPR:$src)>; +def : Pat<(v4i16 (vneg8 DPR:$src)), (VNEGs16d DPR:$src)>; +def : Pat<(v2i32 (vneg8 DPR:$src)), (VNEGs32d DPR:$src)>; +def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>; +def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>; +def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>; // VQNEG : Vector Saturating Negate defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, @@ -2757,83 +2801,95 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, // VMOV : Vector Move (Register) +let neverHasSideEffects = 1 in { def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - IIC_VMOVD, "vmov", "$dst, $src", "", []>; + N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - IIC_VMOVD, "vmov", "$dst, $src", "", []>; + N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; + +// Pseudo vector move instructions for QQ and QQQQ registers. This should +// be expanded after register allocation is completed. +def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), + NoItinerary, "${:comment} vmov\t$dst, $src", []>; + +def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), + NoItinerary, "${:comment} vmov\t$dst, $src", []>; +} // neverHasSideEffects // VMOV : Vector Move (Immediate) // VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. def VMOV_get_imm8 : SDNodeXForm; def vmovImm8 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 1, *CurDAG).getNode() != 0; }], VMOV_get_imm8>; // VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. def VMOV_get_imm16 : SDNodeXForm; def vmovImm16 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 2, *CurDAG).getNode() != 0; }], VMOV_get_imm16>; // VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. def VMOV_get_imm32 : SDNodeXForm; def vmovImm32 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 4, *CurDAG).getNode() != 0; }], VMOV_get_imm32>; // VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. def VMOV_get_imm64 : SDNodeXForm; def vmovImm64 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 8, *CurDAG).getNode() != 0; }], VMOV_get_imm64>; // Note: Some of the cmode bits in the following VMOV instructions need to // be encoded based on the immed values. +let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), - (ins h8imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), - (ins h8imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; -def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), - (ins h16imm:$SIMM), IIC_VMOVImm, +def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; -def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), - (ins h16imm:$SIMM), IIC_VMOVImm, +def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; -def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), - (ins h32imm:$SIMM), IIC_VMOVImm, +def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; -def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), - (ins h32imm:$SIMM), IIC_VMOVImm, +def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), - (ins h64imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), - (ins h64imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; +} // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) @@ -2946,11 +3002,11 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; def : Pat<(v2f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; def : Pat<(v4f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; def : Pat<(v8i8 (scalar_to_vector GPR:$src)), (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; @@ -2962,15 +3018,15 @@ def : Pat<(v2i32 (scalar_to_vector GPR:$src)), def : Pat<(v16i8 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; def : Pat<(v8i16 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; def : Pat<(v4i32 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; // VDUP : Vector Duplicate (from ARM core register to all elements) @@ -3001,30 +3057,29 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), // VDUP : Vector Duplicate Lane (from scalar to all elements) -class VDUPLND op19_18, bits<2> op17_16, - string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, - (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src[$lane]", "", - [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; +class VDUPLND op19_16, string OpcodeStr, string Dt, + ValueType Ty> + : NVDupLane; -class VDUPLNQ op19_18, bits<2> op17_16, string OpcodeStr, string Dt, +class VDUPLNQ op19_16, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, - (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src[$lane]", "", - [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; + : NVDupLane; // Inst{19-16} is partially specified depending on the element size. -def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>; -def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>; -def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>; -def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>; -def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>; -def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>; -def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>; -def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>; +def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>; +def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>; +def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>; +def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>; +def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>; +def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>; +def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>; +def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>; def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -3053,17 +3108,6 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; -def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), - (INSERT_SUBREG QPR:$src, - (i64 (EXTRACT_SUBREG QPR:$src, - (DSubReg_f64_reg imm:$lane))), - (DSubReg_f64_other_reg imm:$lane))>; -def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), - (INSERT_SUBREG QPR:$src, - (f64 (EXTRACT_SUBREG QPR:$src, - (DSubReg_f64_reg imm:$lane))), - (DSubReg_f64_other_reg imm:$lane))>; - // VMOVN : Vector Narrowing Move defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn", "i", int_arm_neon_vmovn>; @@ -3186,15 +3230,15 @@ def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; class VEXTd : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), - (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, - OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm, + IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), (Ty DPR:$rhs), imm:$index)))]>; class VEXTq : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), - (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, - OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm, + IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), (Ty QPR:$rhs), imm:$index)))]>; @@ -3243,25 +3287,26 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; // VTBL : Vector Table Lookup def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$src), IIC_VTB1, + (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1, "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), + NVTBLFrm, IIC_VTB4, "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; @@ -3270,26 +3315,27 @@ def VTBL4 // VTBX : Vector Table Extension def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, + (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1, "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2, "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), + NVTBLFrm, IIC_VTBX3, "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, + DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4, "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, @@ -3303,27 +3349,27 @@ def VTBX4 class N2VSPat : NEONFPPat<(ResTy (OpNode SPR:$a)), (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0))), - arm_ssubreg_0)>; + SPR:$a, ssub_0))), + ssub_0)>; class N3VSPat : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), (EXTRACT_SUBREG (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), + SPR:$a, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0))), - arm_ssubreg_0)>; + SPR:$b, ssub_0))), + ssub_0)>; class N3VSMulOpPat : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$acc, arm_ssubreg_0), + SPR:$acc, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), + SPR:$a, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; + SPR:$b, ssub_0)), + ssub_0)>; // These need separate instructions because they must use DPR_VFP2 register // class which have SPR sub-registers. @@ -3349,12 +3395,12 @@ def : N3VSPat; //let neverHasSideEffects = 1 in //def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", -// v2f32, fmul, fadd>; +// v2f32, fmul, fadd>; //def : N3VSMulOpPat; //let neverHasSideEffects = 1 in //def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", -// v2f32, fmul, fsub>; +// v2f32, fmul, fsub>; //def : N3VSMulOpPat; // Vector Absolute used for single-precision FP @@ -3374,14 +3420,14 @@ def : N2VSPat; // Vector Maximum used for single-precision FP let neverHasSideEffects = 1 in def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, "vmax", "f32", "$dst, $src1, $src2", "", []>; def : N3VSPat; // Vector Minimum used for single-precision FP let neverHasSideEffects = 1 in def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, "vmin", "f32", "$dst, $src1, $src2", "", []>; def : N3VSPat;