[PowerPC] Better scheduling for isel on P7/P8
authorHal Finkel <hfinkel@anl.gov>
Sun, 1 Feb 2015 17:52:16 +0000 (17:52 +0000)
committerHal Finkel <hfinkel@anl.gov>
Sun, 1 Feb 2015 17:52:16 +0000 (17:52 +0000)
isel is actually a cracked instruction on the P7/P8, and must start a dispatch
group. The scheduling model should reflect this so that we don't bunch too many
of them together when possible.

Thanks to Bill Schmidt and Pat Haugen for helping to sort this out.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227758 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/PowerPC/PPCInstr64Bit.td
lib/Target/PowerPC/PPCInstrInfo.td
lib/Target/PowerPC/PPCSchedule.td
lib/Target/PowerPC/PPCSchedule440.td
lib/Target/PowerPC/PPCScheduleA2.td
lib/Target/PowerPC/PPCScheduleE500mc.td
lib/Target/PowerPC/PPCScheduleE5500.td
lib/Target/PowerPC/PPCScheduleP7.td
lib/Target/PowerPC/PPCScheduleP8.td
test/CodeGen/PowerPC/p8-isel-sched.ll [new file with mode: 0644]

index 19c33942bf4bdd2515457c4a05ff2c1c5ddc08d5..0ba6ecb5a60bb934ae9095bdb4650a4c59a315b3 100644 (file)
@@ -686,7 +686,7 @@ defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
 let isSelect = 1 in
 def ISEL8   : AForm_4<31, 15,
                      (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
-                     "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
+                     "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
                      []>;
 }  // Interpretation64Bit
 }  // hasSideEffects = 0
index 8027e8307fd0358e8612f32885f3b0501afa6031..65a71e4cc5c59633532ffe81ebdb5f3604370fe7 100644 (file)
@@ -2377,7 +2377,7 @@ let PPC970_Unit = 1 in {  // FXU Operations.
   let isSelect = 1 in
   def ISEL  : AForm_4<31, 15,
                      (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
-                     "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
+                     "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
                      []>;
 }
 
index 3ae3793f1a8b4a337859ad132ac37e583323e8da..2f3a1f983bce3ef940ab9d04c53e51b44cc36467 100644 (file)
@@ -13,6 +13,7 @@
 def IIC_IntSimple    : InstrItinClass;
 def IIC_IntGeneral   : InstrItinClass;
 def IIC_IntCompare   : InstrItinClass;
+def IIC_IntISEL      : InstrItinClass;
 def IIC_IntDivD      : InstrItinClass;
 def IIC_IntDivW      : InstrItinClass;
 def IIC_IntMFFS      : InstrItinClass;
@@ -217,6 +218,7 @@ include "PPCScheduleE5500.td"
 //    fsub       IIC_FPAddSub
 //    fsubs      IIC_FPGeneral
 //    icbi       IIC_LdStICBI
+//    isel       IIC_IntISEL
 //    isync      IIC_SprISYNC
 //    lbz        IIC_LdStLoad
 //    lbzu       IIC_LdStLoadUpd
index 218fed248a31ff47a1376a814c660895b47c050f..04a43bc03251a45156adc56b89856aa9d79719e8 100644 (file)
@@ -121,6 +121,14 @@ def PPC440Itineraries : ProcessorItineraries<
                                 [2, 0, 0],
                                 [P440_GPR_Bypass,
                                  P440_GPR_Bypass, P440_GPR_Bypass]>,
+  InstrItinData<IIC_IntISEL,    [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+                                 InstrStage<1, [P440_IRACC, P440_LRACC]>,
+                                 InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+                                 InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+                                 InstrStage<1, [P440_IWB, P440_JWB]>],
+                                [2, 0, 0, 0],
+                                [P440_GPR_Bypass,
+                                 P440_GPR_Bypass, P440_GPR_Bypass, NoBypass]>,
   InstrItinData<IIC_IntCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
                                  InstrStage<1, [P440_IRACC, P440_LRACC]>,
                                  InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
index 14476963bad03809f6c9a50098cb4147282845bf..21a357a2efcf8421902b93082a782af8de42a163 100644 (file)
@@ -29,6 +29,8 @@ def PPCA2Itineraries : ProcessorItineraries<
                                  [1, 0, 0]>,
   InstrItinData<IIC_IntGeneral,  [InstrStage<1, [A2_XU]>],
                                  [2, 0, 0]>,
+  InstrItinData<IIC_IntISEL,     [InstrStage<1, [A2_XU]>],
+                                 [2, 0, 0, 0]>,
   InstrItinData<IIC_IntCompare,  [InstrStage<1, [A2_XU]>],
                                  [2, 0, 0]>,
   InstrItinData<IIC_IntDivW,     [InstrStage<1, [A2_XU]>],
index dab89e3db353d746eafda63cdeeba3850a4b62dd..36b8517dabf1f7a65244cf466eeb44e8c47ad8d2 100644 (file)
@@ -54,6 +54,12 @@ def PPCE500mcItineraries : ProcessorItineraries<
                                  [4, 1, 1], // Latency = 1
                                  [E500_GPR_Bypass,
                                   E500_GPR_Bypass, E500_GPR_Bypass]>,
+  InstrItinData<IIC_IntISEL,     [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+                                  InstrStage<1, [E500_SFX0, E500_SFX1]>],
+                                 [4, 1, 1, 1], // Latency = 1
+                                 [E500_GPR_Bypass,
+                                  E500_GPR_Bypass, E500_GPR_Bypass,
+                                  E500_CR_Bypass]>,
   InstrItinData<IIC_IntCompare,  [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
                                   InstrStage<1, [E500_SFX0, E500_SFX1]>],
                                  [5, 1, 1], // Latency = 1 or 2
index de097d9d8cf560d25ea4d4cf89e962346805882d..7c2693ef0d4fe3c64c5a4f69962e7b3e8d1d7d0f 100644 (file)
@@ -58,6 +58,12 @@ def PPCE5500Itineraries : ProcessorItineraries<
                                  [5, 2, 2], // Latency = 1
                                  [E5500_GPR_Bypass,
                                   E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+  InstrItinData<IIC_IntISEL,     [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+                                  InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+                                 [5, 2, 2, 2], // Latency = 1
+                                 [E5500_GPR_Bypass,
+                                  E5500_GPR_Bypass, E5500_GPR_Bypass,
+                                  E5500_CR_Bypass]>,
   InstrItinData<IIC_IntCompare,  [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                   InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
                                  [6, 2, 2], // Latency = 1 or 2
index 03693cbeada071ac85182f97a8ed56d8707653e7..635d154d10bf444bb83419281f5b1c02bbeb10bd 100644 (file)
@@ -89,6 +89,10 @@ def P7Itineraries : ProcessorItineraries<
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
                                   [1, 1, 1]>,
+  InstrItinData<IIC_IntISEL,      [InstrStage<1, [P7_DU1], 0>,
+                                   InstrStage<1, [P7_FX1, P7_FX2], 0>,
+                                   InstrStage<1, [P7_BRU]>],
+                                  [1, 1, 1, 1]>,
   InstrItinData<IIC_IntCompare  , [InstrStage<1, [P7_DU1, P7_DU2,
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
index 07971809c877beb184564e8cb5bafd7d5818b365..020739baec3a6cf632576fd162abbc374096ce71 100644 (file)
@@ -66,6 +66,10 @@ def P8Itineraries : ProcessorItineraries<
                                    InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
                                                   P8_LU2, P8_LSU1, P8_LSU2]>],
                                   [1, 1, 1]>,
+  InstrItinData<IIC_IntISEL,      [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
+                                   InstrStage<1, [P8_BRU]>],
+                                  [1, 1, 1, 1]>,
   InstrItinData<IIC_IntCompare  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
diff --git a/test/CodeGen/PowerPC/p8-isel-sched.ll b/test/CodeGen/PowerPC/p8-isel-sched.ll
new file mode 100644 (file)
index 0000000..034fe3c
--- /dev/null
@@ -0,0 +1,33 @@
+; RUN: llc -mcpu=pwr8 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(i32* nocapture %r1, i32* nocapture %r2, i32* nocapture %r3, i32* nocapture %r4, i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) #0 {
+entry:
+  %tobool = icmp ne i32 %a, 0
+  %cond = select i1 %tobool, i32 %b, i32 %c
+  store i32 %cond, i32* %r1, align 4
+  %cond5 = select i1 %tobool, i32 %b, i32 %d
+  store i32 %cond5, i32* %r2, align 4
+  %add = add nsw i32 %b, 1
+  %sub = add nsw i32 %d, -2
+  %cond10 = select i1 %tobool, i32 %add, i32 %sub
+  store i32 %cond10, i32* %r3, align 4
+  %add13 = add nsw i32 %b, 3
+  %sub15 = add nsw i32 %d, -5
+  %cond17 = select i1 %tobool, i32 %add13, i32 %sub15
+  store i32 %cond17, i32* %r4, align 4
+  ret void
+}
+
+; Make sure that we don't schedule all of the isels together, they should be
+; intermixed with the adds because each isel starts a new dispatch group.
+; CHECK-LABEL: @foo
+; CHECK: isel
+; CHECK: addi
+; CHECK: isel
+; CHECK: blr
+
+attributes #0 = { nounwind }
+