From 60ff87914fcafd82fb123f03b17827ab7b2c3ab3 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 11 Oct 2010 22:03:18 +0000 Subject: [PATCH] Proper VST scheduling itineraries. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116251 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 206 ++++++++++++++++---------------- lib/Target/ARM/ARMSchedule.td | 23 +++- lib/Target/ARM/ARMScheduleA8.td | 153 ++++++++++++++++++++++-- lib/Target/ARM/ARMScheduleA9.td | 138 +++++++++++++++++++-- 4 files changed, 396 insertions(+), 124 deletions(-) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d2ff222e29f..0dc3d788ccc 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -617,16 +617,16 @@ class VSTQQWBPseudo "$addr.addr = $wb">; class VSTQQQQWBPseudo : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, "$addr.addr = $wb">; // VST1 : Vector Store (multiple single elements) class VST1D op7_4, string Dt> - : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST1, "vst1", Dt, "\\{$src\\}, $addr", "", []>; class VST1Q op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST1x2, "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>; def VST1d8 : VST1D<0b0000, "8">; @@ -639,20 +639,20 @@ def VST1q16 : VST1Q<0b0100, "16">; def VST1q32 : VST1Q<0b1000, "32">; def VST1q64 : VST1Q<0b1100, "64">; -def VST1q8Pseudo : VSTQPseudo; -def VST1q16Pseudo : VSTQPseudo; -def VST1q32Pseudo : VSTQPseudo; -def VST1q64Pseudo : VSTQPseudo; +def VST1q8Pseudo : VSTQPseudo; +def VST1q16Pseudo : VSTQPseudo; +def VST1q32Pseudo : VSTQPseudo; +def VST1q64Pseudo : VSTQPseudo; // ...with address register writeback: class VST1DWB op7_4, string Dt> : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST1u, "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; class VST1QWB op7_4, string Dt> : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), - IIC_VST, "vst1", Dt, "\\{$src1, $src2\\}, $addr$offset", + IIC_VST1x2u, "vst1", Dt, "\\{$src1, $src2\\}, $addr$offset", "$addr.addr = $wb", []>; def VST1d8_UPD : VST1DWB<0b0000, "8">; @@ -665,21 +665,21 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">; def VST1q32_UPD : VST1QWB<0b1000, "32">; def VST1q64_UPD : VST1QWB<0b1100, "64">; -def VST1q8Pseudo_UPD : VSTQWBPseudo; -def VST1q16Pseudo_UPD : VSTQWBPseudo; -def VST1q32Pseudo_UPD : VSTQWBPseudo; -def VST1q64Pseudo_UPD : VSTQWBPseudo; +def VST1q8Pseudo_UPD : VSTQWBPseudo; +def VST1q16Pseudo_UPD : VSTQWBPseudo; +def VST1q32Pseudo_UPD : VSTQWBPseudo; +def VST1q64Pseudo_UPD : VSTQWBPseudo; // ...with 3 registers (some of these are only for the disassembler): class VST1D3 op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; + IIC_VST1x3, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; class VST1D3WB op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", + IIC_VST1x3u, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", "$addr.addr = $wb", []>; def VST1d8T : VST1D3<0b0000, "8">; @@ -692,20 +692,20 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">; def VST1d32T_UPD : VST1D3WB<0b1000, "32">; def VST1d64T_UPD : VST1D3WB<0b1100, "64">; -def VST1d64TPseudo : VSTQQPseudo; -def VST1d64TPseudo_UPD : VSTQQWBPseudo; +def VST1d64TPseudo : VSTQQPseudo; +def VST1d64TPseudo_UPD : VSTQQWBPseudo; // ...with 4 registers (some of these are only for the disassembler): class VST1D4 op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", + IIC_VST1x4, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; class VST1D4WB op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, + "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", "$addr.addr = $wb", []>; def VST1d8Q : VST1D4<0b0000, "8">; @@ -718,18 +718,18 @@ def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; -def VST1d64QPseudo : VSTQQPseudo; -def VST1d64QPseudo_UPD : VSTQQWBPseudo; +def VST1d64QPseudo : VSTQQPseudo; +def VST1d64QPseudo_UPD : VSTQQWBPseudo; // VST2 : Vector Store (multiple 2-element structures) class VST2D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), - IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; + IIC_VST2, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; class VST2Q op7_4, string Dt> : NLdSt<0, 0b00, 0b0011, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST2x2, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; def VST2d8 : VST2D<0b1000, 0b0000, "8">; @@ -740,25 +740,25 @@ def VST2q8 : VST2Q<0b0000, "8">; def VST2q16 : VST2Q<0b0100, "16">; def VST2q32 : VST2Q<0b1000, "32">; -def VST2d8Pseudo : VSTQPseudo; -def VST2d16Pseudo : VSTQPseudo; -def VST2d32Pseudo : VSTQPseudo; +def VST2d8Pseudo : VSTQPseudo; +def VST2d16Pseudo : VSTQPseudo; +def VST2d32Pseudo : VSTQPseudo; -def VST2q8Pseudo : VSTQQPseudo; -def VST2q16Pseudo : VSTQQPseudo; -def VST2q32Pseudo : VSTQQPseudo; +def VST2q8Pseudo : VSTQQPseudo; +def VST2q16Pseudo : VSTQQPseudo; +def VST2q32Pseudo : VSTQQPseudo; // ...with address register writeback: class VST2DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), - IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", + IIC_VST2u, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", "$addr.addr = $wb", []>; class VST2QWB op7_4, string Dt> : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, + "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", "$addr.addr = $wb", []>; def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; @@ -769,13 +769,13 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">; def VST2q16_UPD : VST2QWB<0b0100, "16">; def VST2q32_UPD : VST2QWB<0b1000, "32">; -def VST2d8Pseudo_UPD : VSTQWBPseudo; -def VST2d16Pseudo_UPD : VSTQWBPseudo; -def VST2d32Pseudo_UPD : VSTQWBPseudo; +def VST2d8Pseudo_UPD : VSTQWBPseudo; +def VST2d16Pseudo_UPD : VSTQWBPseudo; +def VST2d32Pseudo_UPD : VSTQWBPseudo; -def VST2q8Pseudo_UPD : VSTQQWBPseudo; -def VST2q16Pseudo_UPD : VSTQQWBPseudo; -def VST2q32Pseudo_UPD : VSTQQWBPseudo; +def VST2q8Pseudo_UPD : VSTQQWBPseudo; +def VST2q16Pseudo_UPD : VSTQQWBPseudo; +def VST2q32Pseudo_UPD : VSTQQWBPseudo; // ...with double-spaced registers (for disassembly only): def VST2b8 : VST2D<0b1001, 0b0000, "8">; @@ -788,22 +788,22 @@ def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">; // VST3 : Vector Store (multiple 3-element structures) class VST3D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST3, "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; def VST3d8 : VST3D<0b0100, 0b0000, "8">; def VST3d16 : VST3D<0b0100, 0b0100, "16">; def VST3d32 : VST3D<0b0100, 0b1000, "32">; -def VST3d8Pseudo : VSTQQPseudo; -def VST3d16Pseudo : VSTQQPseudo; -def VST3d32Pseudo : VSTQQPseudo; +def VST3d8Pseudo : VSTQQPseudo; +def VST3d16Pseudo : VSTQQPseudo; +def VST3d32Pseudo : VSTQQPseudo; // ...with address register writeback: class VST3DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST3u, "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", "$addr.addr = $wb", []>; @@ -811,9 +811,9 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; -def VST3d8Pseudo_UPD : VSTQQWBPseudo; -def VST3d16Pseudo_UPD : VSTQQWBPseudo; -def VST3d32Pseudo_UPD : VSTQQWBPseudo; +def VST3d8Pseudo_UPD : VSTQQWBPseudo; +def VST3d16Pseudo_UPD : VSTQQWBPseudo; +def VST3d32Pseudo_UPD : VSTQQWBPseudo; // ...with double-spaced registers (non-updating versions for disassembly only): def VST3q8 : VST3D<0b0101, 0b0000, "8">; @@ -823,35 +823,35 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; -def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; // ...alternate versions to be allocated odd register numbers: -def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; // VST4 : Vector Store (multiple 4-element structures) class VST4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST4, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; def VST4d8 : VST4D<0b0000, 0b0000, "8">; def VST4d16 : VST4D<0b0000, 0b0100, "16">; def VST4d32 : VST4D<0b0000, 0b1000, "32">; -def VST4d8Pseudo : VSTQQPseudo; -def VST4d16Pseudo : VSTQQPseudo; -def VST4d32Pseudo : VSTQQPseudo; +def VST4d8Pseudo : VSTQQPseudo; +def VST4d16Pseudo : VSTQQPseudo; +def VST4d32Pseudo : VSTQQPseudo; // ...with address register writeback: class VST4DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", "$addr.addr = $wb", []>; @@ -859,9 +859,9 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; -def VST4d8Pseudo_UPD : VSTQQWBPseudo; -def VST4d16Pseudo_UPD : VSTQQWBPseudo; -def VST4d32Pseudo_UPD : VSTQQWBPseudo; +def VST4d8Pseudo_UPD : VSTQQWBPseudo; +def VST4d16Pseudo_UPD : VSTQQWBPseudo; +def VST4d32Pseudo_UPD : VSTQQWBPseudo; // ...with double-spaced registers (non-updating versions for disassembly only): def VST4q8 : VST4D<0b0001, 0b0000, "8">; @@ -871,14 +871,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; -def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; // ...alternate versions to be allocated odd register numbers: -def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; // Classes for VST*LN pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -911,29 +911,29 @@ class VSTQQQQLNWBPseudo class VST2LN op11_8, bits<4> op7_4, string Dt> : NLdSt<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", + IIC_VST2ln, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", "", []>; def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">; def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">; def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">; -def VST2LNd8Pseudo : VSTQLNPseudo; -def VST2LNd16Pseudo : VSTQLNPseudo; -def VST2LNd32Pseudo : VSTQLNPseudo; +def VST2LNd8Pseudo : VSTQLNPseudo; +def VST2LNd16Pseudo : VSTQLNPseudo; +def VST2LNd32Pseudo : VSTQLNPseudo; // ...with double-spaced registers: def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">; def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">; -def VST2LNq16Pseudo : VSTQQLNPseudo; -def VST2LNq32Pseudo : VSTQQLNPseudo; +def VST2LNq16Pseudo : VSTQQLNPseudo; +def VST2LNq32Pseudo : VSTQQLNPseudo; // ...with address register writeback: class VST2LNWB op11_8, bits<4> op7_4, string Dt> : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", "$addr.addr = $wb", []>; @@ -941,44 +941,44 @@ def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">; def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">; def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">; -def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo; -def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo; -def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo; +def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo; +def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo; +def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo; def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">; def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">; -def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo; -def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo; +def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo; +def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN op11_8, bits<4> op7_4, string Dt> : NLdSt<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, "vst3", Dt, + nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">; def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">; def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">; -def VST3LNd8Pseudo : VSTQQLNPseudo; -def VST3LNd16Pseudo : VSTQQLNPseudo; -def VST3LNd32Pseudo : VSTQQLNPseudo; +def VST3LNd8Pseudo : VSTQQLNPseudo; +def VST3LNd16Pseudo : VSTQQLNPseudo; +def VST3LNd32Pseudo : VSTQQLNPseudo; // ...with double-spaced registers: def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">; def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">; -def VST3LNq16Pseudo : VSTQQQQLNPseudo; -def VST3LNq32Pseudo : VSTQQQQLNPseudo; +def VST3LNq16Pseudo : VSTQQQQLNPseudo; +def VST3LNq32Pseudo : VSTQQQQLNPseudo; // ...with address register writeback: class VST3LNWB op11_8, bits<4> op7_4, string Dt> : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), - IIC_VST, "vst3", Dt, + IIC_VST3lnu, "vst3", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", "$addr.addr = $wb", []>; @@ -986,21 +986,21 @@ def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">; def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">; def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">; -def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo; -def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo; -def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo; +def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo; +def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo; +def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo; def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">; def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">; -def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; -def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN op11_8, bits<4> op7_4, string Dt> : NLdSt<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, "vst4", Dt, + nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", "", []>; @@ -1008,23 +1008,23 @@ def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">; def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">; def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">; -def VST4LNd8Pseudo : VSTQQLNPseudo; -def VST4LNd16Pseudo : VSTQQLNPseudo; -def VST4LNd32Pseudo : VSTQQLNPseudo; +def VST4LNd8Pseudo : VSTQQLNPseudo; +def VST4LNd16Pseudo : VSTQQLNPseudo; +def VST4LNd32Pseudo : VSTQQLNPseudo; // ...with double-spaced registers: def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">; def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">; -def VST4LNq16Pseudo : VSTQQQQLNPseudo; -def VST4LNq32Pseudo : VSTQQQQLNPseudo; +def VST4LNq16Pseudo : VSTQQQQLNPseudo; +def VST4LNq32Pseudo : VSTQQQQLNPseudo; // ...with address register writeback: class VST4LNWB op11_8, bits<4> op7_4, string Dt> : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), - IIC_VST, "vst4", Dt, + IIC_VST4lnu, "vst4", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", "$addr.addr = $wb", []>; @@ -1032,15 +1032,15 @@ def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">; def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">; def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; -def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo; -def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo; -def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo; +def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo; +def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo; +def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; -def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; -def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 7ccac7ee861..391367ccf26 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -148,7 +148,28 @@ def IIC_VLD4 : InstrItinClass; def IIC_VLD4ln : InstrItinClass; def IIC_VLD4u : InstrItinClass; def IIC_VLD4lnu : InstrItinClass; -def IIC_VST : InstrItinClass; +def IIC_VST1 : InstrItinClass; +def IIC_VST1x2 : InstrItinClass; +def IIC_VST1x3 : InstrItinClass; +def IIC_VST1x4 : InstrItinClass; +def IIC_VST1u : InstrItinClass; +def IIC_VST1x2u : InstrItinClass; +def IIC_VST1x3u : InstrItinClass; +def IIC_VST1x4u : InstrItinClass; +def IIC_VST2 : InstrItinClass; +def IIC_VST2x2 : InstrItinClass; +def IIC_VST2u : InstrItinClass; +def IIC_VST2x2u : InstrItinClass; +def IIC_VST2ln : InstrItinClass; +def IIC_VST2lnu : InstrItinClass; +def IIC_VST3 : InstrItinClass; +def IIC_VST3u : InstrItinClass; +def IIC_VST3ln : InstrItinClass; +def IIC_VST3lnu : InstrItinClass; +def IIC_VST4 : InstrItinClass; +def IIC_VST4u : InstrItinClass; +def IIC_VST4ln : InstrItinClass; +def IIC_VST4lnu : InstrItinClass; def IIC_VUNAD : InstrItinClass; def IIC_VUNAQ : InstrItinClass; def IIC_VBIND : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index fc6ad340051..e318950b0f4 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -390,8 +390,9 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD1 InstrItinData, - InstrStage<1, [A8_NLSPipe], 1>, - InstrStage<1, [A8_LSPipe]>]>, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [2, 1]>, // VLD1x2 InstrItinData, InstrStage<2, [A8_NLSPipe], 1>, @@ -412,8 +413,8 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD1u InstrItinData, - InstrStage<1, [A8_NLSPipe], 1>, - InstrStage<1, [A8_LSPipe]>], + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], [2, 2, 1]>, // // VLD1x2u @@ -436,8 +437,8 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD2 InstrItinData, - InstrStage<1, [A8_NLSPipe], 1>, - InstrStage<1, [A8_LSPipe]>], + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], [2, 2, 1]>, // // VLD2x2 @@ -454,8 +455,8 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD2u InstrItinData, - InstrStage<1, [A8_NLSPipe], 1>, - InstrStage<1, [A8_LSPipe]>], + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], [2, 2, 2, 1, 1, 1]>, // // VLD2x2u @@ -518,11 +519,137 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<5, [A8_LSPipe]>], [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, // - // VST - // FIXME: We don't model this instruction properly - InstrItinData, - InstrStage<1, [A8_NLSPipe]>, - InstrStage<1, [A8_LSPipe]>]>, + // VST1 + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1]>, + // + // VST1x2 + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1]>, + // + // VST1x3 + InstrItinData, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 1, 1, 2]>, + // + // VST1x4 + InstrItinData, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST1u + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1]>, + // + // VST1x2u + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST1x3u + InstrItinData, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST1x4u + InstrItinData, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST2 + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1]>, + // + // VST2x2 + InstrItinData, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST2u + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST2x2u + InstrItinData, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST2ln + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1]>, + // + // VST2lnu + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST3 + InstrItinData, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 1, 1, 2]>, + // + // VST3u + InstrItinData, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST3ln + InstrItinData, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 1, 1, 2]>, + // + // VST3lnu + InstrItinData, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST4 + InstrItinData, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST4u + InstrItinData, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST4ln + InstrItinData, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST4lnu + InstrItinData, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, // // Double-register FP Unary InstrItinData, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 06edaa9446d..8f41615a13c 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -885,14 +885,138 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<5, [A9_NPipe]>], [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>, // - // VST - // FIXME: We don't model this instruction properly - InstrItinData, - // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + // VST1 + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [1, 1, 1]>, + // + // VST1x2 + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [1, 1, 1, 1]>, + // + // VST1x3 + InstrItinData, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_NPipe]>]>, + InstrStage<3, [A9_NPipe]>], + [1, 1, 1, 1, 2]>, + // + // VST1x4 + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST1u + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [2, 1, 1, 1, 1]>, + // + // VST1x2u + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST1x3u + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST1x4u + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST2 + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [1, 1, 1, 1]>, + // + // VST2x2 + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST2u + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST2x2u + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST2ln + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [1, 1, 1, 1]>, + // + // VST2lnu + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST3 + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [1, 1, 1, 1, 2]>, + // + // VST3u + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST3ln + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [1, 1, 1, 1, 2]>, + // + // VST3lnu + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST4 + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST4u + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST4ln + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST4lnu + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // // Double-register Integer Unary InstrItinData, -- 2.34.1