From: Hal Finkel Date: Fri, 29 Nov 2013 07:04:59 +0000 (+0000) Subject: Adjust PPC A2 input operand latencies X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=eb5a560e0026ddaa33b26675d1d70148a3c0ad82;p=oota-llvm.git Adjust PPC A2 input operand latencies On the PPC A2, instructions are only issued after their input operands are ready. Model this by specifying that input operands are read at dispatch (0 cycles after issue). This changes all input operand latencies from 1 to 0. Significant test-suite performance changes (these are 99.5% confidence intervals on 6 runs for both before and after): speedups: MultiSource/Benchmarks/sim/sim -1.21915% +/- 0.175063% MultiSource/Benchmarks/TSVC/LinearDependence-flt/LinearDependence-flt -1.23946% +/- 1.05133% SingleSource/Benchmarks/Misc/flops-2 -1.24237% +/- 0.681362% MultiSource/Applications/JM/lencod/lencod -1.33992% +/- 0.757498% MultiSource/Benchmarks/TSVC/InductionVariable-flt/InductionVariable-flt -1.51802% +/- 1.21468% MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/GlobalDataFlow-flt -2.18818% +/- 1.28605% MultiSource/Benchmarks/TSVC/Packing-flt/Packing-flt -2.21977% +/- 1.19499% SingleSource/Benchmarks/BenchmarkGame/spectral-norm -2.29822% +/- 0.671871% MultiSource/Benchmarks/TSVC/Packing-dbl/Packing-dbl -2.40975% +/- 0.355931% SingleSource/Benchmarks/Misc/fp-convert -2.41899% +/- 1.04751% MultiSource/Benchmarks/TSVC/Searching-dbl/Searching-dbl -2.50349% +/- 0.126765% SingleSource/Benchmarks/Misc/flops-3 -3.00214% +/- 0.700795% MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/LoopRestructuring-flt -3.56995% +/- 3.2929% MultiSource/Applications/sgefa/sgefa -4.24908% +/- 2.00413% MultiSource/Benchmarks/ASC_Sequoia/IRSmk/IRSmk -18.1294% +/- 3.96489% regressions: MultiSource/Benchmarks/TSVC/Reductions-dbl/Reductions-dbl 1.03249% +/- 0.178547% MultiSource/Applications/hexxagon/hexxagon 1.16597% +/- 0.285235% MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/IndirectAddressing-flt 1.39576% +/- 1.07855% SingleSource/Benchmarks/Misc-C++/stepanov_v1p2 1.71539% +/- 0.173182% MultiSource/Benchmarks/Fhourstones-3.1/fhourstones3.1 1.90013% +/- 0.866472% MultiSource/Benchmarks/TSVC/Recurrences-dbl/Recurrences-dbl 2.39854% +/- 1.05914% MultiSource/Benchmarks/TSVC/ControlFlow-dbl/ControlFlow-dbl 2.4402% +/- 0.817904% MultiSource/Benchmarks/TSVC/LoopRestructuring-dbl/LoopRestructuring-dbl 5.87997% +/- 3.3172% MultiSource/Benchmarks/Trimaran/netbench-crc/netbench-crc 9.02643% +/- 5.79591% MultiSource/Benchmarks/VersaBench/bmm/bmm 10.3517% +/- 1.227% Obviously, there are data points on both sides of this; but I think, overall, this supports making the change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195951 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 509de816aa1..bd95924be8a 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -26,117 +26,117 @@ def A2_FU : FuncUnit; // FI pipeline def PPCA2Itineraries : ProcessorItineraries< [A2_XU, A2_FU], [], [ InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [39, 1, 1]>, + [39, 0, 0]>, InstrItinData], - [71, 1, 1]>, + [71, 0, 0]>, InstrItinData], - [5, 1, 1]>, + [5, 0, 0]>, InstrItinData], - [5, 1, 1]>, + [5, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1]>, + [2, 0]>, InstrItinData], - [2, 1]>, + [2, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [5, 1, 1]>, + [5, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [6, 8, 1, 1]>, + [6, 8, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [0, 0, 0]>, InstrItinData], - [2, 1, 1, 1]>, + [2, 0, 0, 0]>, InstrItinData], - [16, 1, 1]>, + [16, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [0, 0, 0]>, InstrItinData], - [2, 1, 1, 1]>, + [2, 0, 0, 0]>, InstrItinData], - [7, 1, 1]>, + [7, 0, 0]>, InstrItinData], - [7, 9, 1, 1]>, + [7, 9, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [6, 8, 1, 1]>, + [6, 8, 0, 0]>, InstrItinData], - [82, 1, 1]>, // L2 latency + [82, 0, 0]>, // L2 latency InstrItinData], - [1, 1, 1]>, + [0, 0, 0]>, InstrItinData], - [2, 1, 1, 1]>, + [2, 0, 0, 0]>, InstrItinData], - [82, 1, 1]>, // L2 latency + [82, 0, 0]>, // L2 latency InstrItinData], - [82, 1, 1]>, // L2 latency + [82, 0, 0]>, // L2 latency InstrItinData], [6]>, InstrItinData], [16]>, InstrItinData], - [16, 1]>, + [16, 0]>, InstrItinData], - [6, 1]>, + [6, 0]>, InstrItinData], - [4, 1]>, + [4, 0]>, InstrItinData], - [6, 1]>, + [6, 0]>, InstrItinData], - [4, 1]>, + [4, 0]>, InstrItinData], - [6, 1]>, + [6, 0]>, InstrItinData], [16]>, InstrItinData], [16]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [5, 1, 1]>, + [5, 0, 0]>, InstrItinData], - [72, 1, 1]>, + [72, 0, 0]>, InstrItinData], - [59, 1, 1]>, + [59, 0, 0]>, InstrItinData], - [69, 1, 1]>, + [69, 0, 0]>, InstrItinData], - [6, 1, 1, 1]>, + [6, 0, 0, 0]>, InstrItinData], - [6, 1]> + [6, 0]> ]>; // ===---------------------------------------------------------------------===//