From: Evan Cheng Date: Sat, 9 Oct 2010 04:07:58 +0000 (+0000) Subject: Add VLD4 scheduling itineraries. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=10dc63feeb7847f867a6f35179312f4079981ad3;p=oota-llvm.git Add VLD4 scheduling itineraries. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116143 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 4340a7d3c74..d2ff222e29f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -548,7 +548,7 @@ class VLD4LN op11_8, bits<4> op7_4, string Dt> : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, "vld4", Dt, + nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; @@ -556,16 +556,16 @@ def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">; def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">; def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">; -def VLD4LNd8Pseudo : VLDQQLNPseudo; -def VLD4LNd16Pseudo : VLDQQLNPseudo; -def VLD4LNd32Pseudo : VLDQQLNPseudo; +def VLD4LNd8Pseudo : VLDQQLNPseudo; +def VLD4LNd16Pseudo : VLDQQLNPseudo; +def VLD4LNd32Pseudo : VLDQQLNPseudo; // ...with double-spaced registers: def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">; def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">; -def VLD4LNq16Pseudo : VLDQQQQLNPseudo; -def VLD4LNq32Pseudo : VLDQQQQLNPseudo; +def VLD4LNq16Pseudo : VLDQQQQLNPseudo; +def VLD4LNq32Pseudo : VLDQQQQLNPseudo; // ...with address register writeback: class VLD4LNWB op11_8, bits<4> op7_4, string Dt> @@ -573,7 +573,7 @@ class VLD4LNWB op11_8, bits<4> op7_4, string Dt> (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), - IIC_VLD4, "vld4", Dt, + IIC_VLD4ln, "vld4", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", []>; @@ -582,15 +582,15 @@ def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">; def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">; def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">; -def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo; -def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo; +def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo; +def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo; def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">; def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; -def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; -def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; +def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; +def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 73c677e0d4a..7ccac7ee861 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -145,6 +145,9 @@ def IIC_VLD3ln : InstrItinClass; def IIC_VLD3u : InstrItinClass; def IIC_VLD3lnu : InstrItinClass; def IIC_VLD4 : InstrItinClass; +def IIC_VLD4ln : InstrItinClass; +def IIC_VLD4u : InstrItinClass; +def IIC_VLD4lnu : InstrItinClass; def IIC_VST : InstrItinClass; def IIC_VUNAD : InstrItinClass; def IIC_VUNAQ : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 6c4cf8f122d..fc6ad340051 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -390,7 +390,7 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD1 InstrItinData, - InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NLSPipe], 1>, InstrStage<1, [A8_LSPipe]>]>, // VLD1x2 InstrItinData, @@ -496,8 +496,27 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD4 InstrItinData, - InstrStage<1, [A8_NLSPipe]>, - InstrStage<1, [A8_LSPipe]>], [2, 2, 2, 2, 1]>, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 4, 1]>, + // + // VLD4ln + InstrItinData, + InstrStage<5, [A8_NLSPipe], 1>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, + // + // VLD4u + InstrItinData, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 4, 2, 1]>, + // + // VLD4lnu + InstrItinData, + InstrStage<5, [A8_NLSPipe], 1>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, // // VST // FIXME: We don't model this instruction properly diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 2d2bc370f52..06edaa9446d 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -854,14 +854,36 @@ def CortexA9Itineraries : ProcessorItineraries< [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>, // // VLD4 - // FIXME: We don't model this instruction properly InstrItinData, - // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_NPipe]>], - [2, 2, 2, 2, 1]>, + InstrStage<4, [A9_NPipe]>], + [4, 4, 5, 5, 1]>, + // + // VLD4ln + InstrItinData, + InstrStage<11, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<5, [A9_NPipe]>], + [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>, + // + // VLD4u + InstrItinData, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<4, [A9_NPipe]>], + [4, 4, 5, 5, 2, 1]>, + // + // VLD4lnu + InstrItinData, + InstrStage<11, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<5, [A9_NPipe]>], + [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>, // // VST // FIXME: We don't model this instruction properly