From eb5a560e0026ddaa33b26675d1d70148a3c0ad82 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 29 Nov 2013 07:04:59 +0000 Subject: [PATCH] Adjust PPC A2 input operand latencies On the PPC A2, instructions are only issued after their input operands are ready. Model this by specifying that input operands are read at dispatch (0 cycles after issue). This changes all input operand latencies from 1 to 0. Significant test-suite performance changes (these are 99.5% confidence intervals on 6 runs for both before and after): speedups: MultiSource/Benchmarks/sim/sim -1.21915% +/- 0.175063% MultiSource/Benchmarks/TSVC/LinearDependence-flt/LinearDependence-flt -1.23946% +/- 1.05133% SingleSource/Benchmarks/Misc/flops-2 -1.24237% +/- 0.681362% MultiSource/Applications/JM/lencod/lencod -1.33992% +/- 0.757498% MultiSource/Benchmarks/TSVC/InductionVariable-flt/InductionVariable-flt -1.51802% +/- 1.21468% MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/GlobalDataFlow-flt -2.18818% +/- 1.28605% MultiSource/Benchmarks/TSVC/Packing-flt/Packing-flt -2.21977% +/- 1.19499% SingleSource/Benchmarks/BenchmarkGame/spectral-norm -2.29822% +/- 0.671871% MultiSource/Benchmarks/TSVC/Packing-dbl/Packing-dbl -2.40975% +/- 0.355931% SingleSource/Benchmarks/Misc/fp-convert -2.41899% +/- 1.04751% MultiSource/Benchmarks/TSVC/Searching-dbl/Searching-dbl -2.50349% +/- 0.126765% SingleSource/Benchmarks/Misc/flops-3 -3.00214% +/- 0.700795% MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/LoopRestructuring-flt -3.56995% +/- 3.2929% MultiSource/Applications/sgefa/sgefa -4.24908% +/- 2.00413% MultiSource/Benchmarks/ASC_Sequoia/IRSmk/IRSmk -18.1294% +/- 3.96489% regressions: MultiSource/Benchmarks/TSVC/Reductions-dbl/Reductions-dbl 1.03249% +/- 0.178547% MultiSource/Applications/hexxagon/hexxagon 1.16597% +/- 0.285235% MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/IndirectAddressing-flt 1.39576% +/- 1.07855% SingleSource/Benchmarks/Misc-C++/stepanov_v1p2 1.71539% +/- 0.173182% MultiSource/Benchmarks/Fhourstones-3.1/fhourstones3.1 1.90013% +/- 0.866472% MultiSource/Benchmarks/TSVC/Recurrences-dbl/Recurrences-dbl 2.39854% +/- 1.05914% MultiSource/Benchmarks/TSVC/ControlFlow-dbl/ControlFlow-dbl 2.4402% +/- 0.817904% MultiSource/Benchmarks/TSVC/LoopRestructuring-dbl/LoopRestructuring-dbl 5.87997% +/- 3.3172% MultiSource/Benchmarks/Trimaran/netbench-crc/netbench-crc 9.02643% +/- 5.79591% MultiSource/Benchmarks/VersaBench/bmm/bmm 10.3517% +/- 1.227% Obviously, there are data points on both sides of this; but I think, overall, this supports making the change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195951 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCScheduleA2.td | 104 ++++++++++++++-------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 509de816aa1..bd95924be8a 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -26,117 +26,117 @@ def A2_FU : FuncUnit; // FI pipeline def PPCA2Itineraries : ProcessorItineraries< [A2_XU, A2_FU], [], [ InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [39, 1, 1]>, + [39, 0, 0]>, InstrItinData], - [71, 1, 1]>, + [71, 0, 0]>, InstrItinData], - [5, 1, 1]>, + [5, 0, 0]>, InstrItinData], - [5, 1, 1]>, + [5, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1, 1]>, + [2, 0, 0]>, InstrItinData], - [2, 1]>, + [2, 0]>, InstrItinData], - [2, 1]>, + [2, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [5, 1, 1]>, + [5, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [1, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [6, 8, 1, 1]>, + [6, 8, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [0, 0, 0]>, InstrItinData], - [2, 1, 1, 1]>, + [2, 0, 0, 0]>, InstrItinData], - [16, 1, 1]>, + [16, 0, 0]>, InstrItinData], - [1, 1, 1]>, + [0, 0, 0]>, InstrItinData], - [2, 1, 1, 1]>, + [2, 0, 0, 0]>, InstrItinData], - [7, 1, 1]>, + [7, 0, 0]>, InstrItinData], - [7, 9, 1, 1]>, + [7, 9, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [6, 8, 1, 1]>, + [6, 8, 0, 0]>, InstrItinData], - [82, 1, 1]>, // L2 latency + [82, 0, 0]>, // L2 latency InstrItinData], - [1, 1, 1]>, + [0, 0, 0]>, InstrItinData], - [2, 1, 1, 1]>, + [2, 0, 0, 0]>, InstrItinData], - [82, 1, 1]>, // L2 latency + [82, 0, 0]>, // L2 latency InstrItinData], - [82, 1, 1]>, // L2 latency + [82, 0, 0]>, // L2 latency InstrItinData], [6]>, InstrItinData], [16]>, InstrItinData], - [16, 1]>, + [16, 0]>, InstrItinData], - [6, 1]>, + [6, 0]>, InstrItinData], - [4, 1]>, + [4, 0]>, InstrItinData], - [6, 1]>, + [6, 0]>, InstrItinData], - [4, 1]>, + [4, 0]>, InstrItinData], - [6, 1]>, + [6, 0]>, InstrItinData], [16]>, InstrItinData], [16]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [6, 1, 1]>, + [6, 0, 0]>, InstrItinData], - [5, 1, 1]>, + [5, 0, 0]>, InstrItinData], - [72, 1, 1]>, + [72, 0, 0]>, InstrItinData], - [59, 1, 1]>, + [59, 0, 0]>, InstrItinData], - [69, 1, 1]>, + [69, 0, 0]>, InstrItinData], - [6, 1, 1, 1]>, + [6, 0, 0, 0]>, InstrItinData], - [6, 1]> + [6, 0]> ]>; // ===---------------------------------------------------------------------===// -- 2.34.1