Revert r116781 "- Add a hook for target to determine whether an instruction def

author Daniel Dunbar <daniel@zuster.org>

Tue, 19 Oct 2010 17:14:24 +0000 (17:14 +0000)

committer Daniel Dunbar <daniel@zuster.org>

Tue, 19 Oct 2010 17:14:24 +0000 (17:14 +0000)
author Daniel Dunbar <daniel@zuster.org>
Tue, 19 Oct 2010 17:14:24 +0000 (17:14 +0000)
committer Daniel Dunbar <daniel@zuster.org>
Tue, 19 Oct 2010 17:14:24 +0000 (17:14 +0000)
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h

index 936345c50466268c4aa4212181022be2225f51dd..6615ee4aacd0b5555949e87ceebcd59d9e9869b2 100644 (file)
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -24,7 +24,6 @@ class InstrItineraryData;
  class LiveVariables;
  class MCAsmInfo;
  class MachineMemOperand;
-class MachineRegisterInfo;
  class MDNode;
  class MCInst;
  class SDNode;
@@ -626,19 +625,6 @@ public:
    int getOperandLatency(const InstrItineraryData *ItinData,
                          SDNode *DefNode, unsigned DefIdx,
                          SDNode *UseNode, unsigned UseIdx) const;
-
-  /// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
-  /// and an use in the current loop, return true if the target considered
-  /// it 'high'. This is used by optimization passes such as machine LICM to
-  /// determine whether it makes sense to hoist an instruction out even in
-  /// high register pressure situation.
-  virtual
-  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
-                             const MachineRegisterInfo *MRI,
-                             const MachineInstr *DefMI, unsigned DefIdx,
-                             const MachineInstr *UseMI, unsigned UseIdx) const {
-    return false;
-  }
  };
  
  /// TargetInstrInfoImpl - This is the default implementation of
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp

index ae3d9db1dc344e6b6d7897d0ec05413e31da6295..7ecf126ad4570f57d2ec65d50682272f867ba6a3 100644 (file)
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -43,6 +43,11 @@
  
  using namespace llvm;
  
+static cl::opt<bool>
+TrackRegPressure("rp-aware-machine-licm",
+                 cl::desc("Register pressure aware machine LICM"),
+                 cl::init(false), cl::Hidden);
+
  STATISTIC(NumHoisted,
            "Number of machine instructions hoisted out of loops");
  STATISTIC(NumLowRP,
@@ -119,7 +124,6 @@ namespace {
        RegSeen.clear();
        RegPressure.clear();
        RegLimit.clear();
-      BackTrace.clear();
        for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
               CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
          CI->second.clear();
@@ -167,10 +171,9 @@ namespace {
      /// 
      bool IsLoopInvariantInst(MachineInstr &I);
  
-    /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
-    /// and an use in the current loop, return true if the target considered
-    /// it 'high'.
-    bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
+    /// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
+    /// and an use in the current loop.
+    int ComputeOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
  
      /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
      /// if hoisting an instruction of the given cost matrix can cause high
@@ -553,24 +556,28 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
    if (!Preheader)
      return;
  
-  if (IsHeader) {
-    // Compute registers which are liveout of preheader.
-    RegSeen.clear();
-    BackTrace.clear();
-    InitRegPressure(Preheader);
-  }
+  if (TrackRegPressure) {
+    if (IsHeader) {
+      // Compute registers which are liveout of preheader.
+      RegSeen.clear();
+      BackTrace.clear();
+      InitRegPressure(Preheader);
+    }
  
-  // Remember livein register pressure.
-  BackTrace.push_back(RegPressure);
+    // Remember livein register pressure.
+    BackTrace.push_back(RegPressure);
+  }
  
    for (MachineBasicBlock::iterator
           MII = BB->begin(), E = BB->end(); MII != E; ) {
      MachineBasicBlock::iterator NextMII = MII; ++NextMII;
      MachineInstr *MI = &*MII;
  
-    UpdateRegPressureBefore(MI);
+    if (TrackRegPressure)
+      UpdateRegPressureBefore(MI);
      Hoist(MI, Preheader);
-    UpdateRegPressureAfter(MI);
+    if (TrackRegPressure)
+      UpdateRegPressureAfter(MI);
  
      MII = NextMII;
    }
@@ -584,7 +591,8 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
        HoistRegion(Children[I]);
    }
  
-  BackTrace.pop_back();
+  if (TrackRegPressure)
+    BackTrace.pop_back();
  }
  
  /// InitRegPressure - Find all virtual register references that are liveout of
@@ -780,14 +788,15 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
    }
  }
  
-/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
-/// and an use in the current loop, return true if the target considered
-/// it 'high'.
-bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
-                                        unsigned DefIdx, unsigned Reg) {
+/// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop.
+int MachineLICM::ComputeOperandLatency(MachineInstr &MI,
+                                       unsigned DefIdx, unsigned Reg) {
    if (MRI->use_nodbg_empty(Reg))
-    return false;
+    // No use? Return arbitrary large number!
+    return 300;
  
+  int Latency = -1;
    for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
           E = MRI->use_nodbg_end(); I != E; ++I) {
      MachineInstr *UseMI = &*I;
@@ -801,15 +810,18 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
        if (MOReg != Reg)
          continue;
  
-      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
-        return true;
+      int UseCycle = TII->getOperandLatency(InstrItins, &MI, DefIdx, UseMI, i);
+      Latency = std::max(Latency, UseCycle);
      }
  
-    // Only look at the first in loop use.
-    break;
+    if (Latency != -1)
+      break;
    }
  
-  return false;
+  if (Latency == -1)
+    Latency = InstrItins->getOperandCycle(MI.getDesc().getSchedClass(), DefIdx);
+
+  return Latency;
  }
  
  /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
@@ -843,19 +855,19 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
    if (MI.isImplicitDef())
      return true;
  
-  // If the instruction is cheap, only hoist if it is re-materilizable. LICM
-  // will increase register pressure. It's probably not worth it if the
-  // instruction is cheap.
+  // FIXME: For now, only hoist re-materilizable instructions. LICM will
+  // increase register pressure. We want to make sure it doesn't increase
+  // spilling.
    // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
    // these tend to help performance in low register pressure situation. The
    // trade off is it may cause spill in high pressure situation. It will end up
    // adding a store in the loop preheader. But the reload is no more expensive.
    // The side benefit is these loads are frequently CSE'ed.
-  if (MI.getDesc().isAsCheapAsAMove()) {
-    if (!TII->isTriviallyReMaterializable(&MI, AA))
+  if (!TrackRegPressure || MI.getDesc().isAsCheapAsAMove()) {
+    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+        !isLoadFromConstantMemory(&MI))
        return false;
    } else {
-    // Estimate register pressure to determine whether to LICM the instruction.
      // In low register pressure situation, we can be more aggressive about 
      // hoisting. Also, favors hoisting long latency instructions even in
      // moderately high pressure situation.
@@ -868,9 +880,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
        if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
          continue;
        if (MO.isDef()) {
-        if (HasHighOperandLatency(MI, i, Reg)) {
-          ++NumHighLatency;
-          return true;
+        if (InstrItins && !InstrItins->isEmpty()) {
+          int Cycle = ComputeOperandLatency(MI, i, Reg);
+          if (Cycle > 3) {
+            // FIXME: Target specific high latency limit?
+            ++NumHighLatency;
+            return true;
+          }
          }
  
          const TargetRegisterClass *RC = MRI->getRegClass(Reg);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index 0b5b2437abc37cd32083d7d64d4509cfa25e25c7..aca292abea5640ec463e35803e96e0f640afda7c 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1925,23 +1925,3 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
                             UseTID, UseIdx, UseAlign);
  }
-
-bool ARMBaseInstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
-                      const MachineRegisterInfo *MRI,
-                      const MachineInstr *DefMI, unsigned DefIdx,
-                      const MachineInstr *UseMI, unsigned UseIdx) const {
-  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
-  unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
-  if (Subtarget.isCortexA8() &&
-      (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
-    // CortexA8 VFP instructions are not pipelined.
-    return true;
-
-  // Hoist VFP / NEON instructions with 4 or higher latency.
-  int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
-  if (Latency <= 3)
-    return false;
-  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
-         UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
-}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h

index b3a832948f0df487d023681816f6dff554ab3a47..36be3366d29a436d7e8cb9d224e83dc8dda33e78 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -377,11 +377,6 @@ private:
                          unsigned DefIdx, unsigned DefAlign,
                          const TargetInstrDesc &UseTID,
                          unsigned UseIdx, unsigned UseAlign) const;
-
-  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
-                             const MachineRegisterInfo *MRI,
-                             const MachineInstr *DefMI, unsigned DefIdx,
-                             const MachineInstr *UseMI, unsigned UseIdx) const;
  };
  
  static inline
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 79d9872a1b2ea1667c2d762a07b369f73cda1903..40ef3dbd0d7ef8bffd3dcbcf0f23a8cc0694f38e 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3152,41 +3152,6 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
    NopInst.setOpcode(X86::NOOP);
  }
  
-bool X86InstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
-                      const MachineRegisterInfo *MRI,
-                      const MachineInstr *DefMI, unsigned DefIdx,
-                      const MachineInstr *UseMI, unsigned UseIdx) const {
-  switch (DefMI->getOpcode()) {
-  default: return false;
-  case X86::DIVSDrm:
-  case X86::DIVSDrm_Int:
-  case X86::DIVSDrr:
-  case X86::DIVSDrr_Int:
-  case X86::DIVSSrm:
-  case X86::DIVSSrm_Int:
-  case X86::DIVSSrr:
-  case X86::DIVSSrr_Int:
-  case X86::SQRTPDm:
-  case X86::SQRTPDm_Int:
-  case X86::SQRTPDr:
-  case X86::SQRTPDr_Int:
-  case X86::SQRTPSm:
-  case X86::SQRTPSm_Int:
-  case X86::SQRTPSr:
-  case X86::SQRTPSr_Int:
-  case X86::SQRTSDm:
-  case X86::SQRTSDm_Int:
-  case X86::SQRTSDr:
-  case X86::SQRTSDr_Int:
-  case X86::SQRTSSm:
-  case X86::SQRTSSm_Int:
-  case X86::SQRTSSr:
-  case X86::SQRTSSr_Int:
-    return true;
-  }
-}
-
  namespace {
    /// CGBR - Create Global Base Reg pass. This initializes the PIC
    /// global base register for x86-32.
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h

index 5060ad836af4a52d93b2ab2d30bd1ebdecd76e3f..e43cfacae501f77626d66f950568ebc6c00602ab 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -864,11 +864,6 @@ public:
                                        unsigned OpNum,
                                        const SmallVectorImpl<MachineOperand> &MOs,
                                        unsigned Size, unsigned Alignment) const;
-
-  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
-                             const MachineRegisterInfo *MRI,
-                             const MachineInstr *DefMI, unsigned DefIdx,
-                             const MachineInstr *UseMI, unsigned UseIdx) const;
    
  private:
    MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll

new file mode 100644 (file)

index 0000000..6b86f1a
--- /dev/null
+++ b/test/CodeGen/ARM/remat.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization"
+
+define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind {
+entry:
+  br i1 undef, label %smvp.exit, label %bb.i3
+
+bb.i3:                                            ; preds = %bb.i3, %bb134
+  br i1 undef, label %smvp.exit, label %bb.i3
+
+smvp.exit:                                        ; preds = %bb.i3
+  %0 = fmul double %d1, 2.400000e-03            ; <double> [#uses=2]
+  br i1 undef, label %bb138.preheader, label %bb159
+
+bb138.preheader:                                  ; preds = %smvp.exit
+  br label %bb138
+
+bb138:                                            ; preds = %bb138, %bb138.preheader
+  br i1 undef, label %bb138, label %bb145.loopexit
+
+bb142:                                            ; preds = %bb.nph218.bb.nph218.split_crit_edge, %phi0.exit
+  %1 = fmul double %d1, -1.200000e-03           ; <double> [#uses=1]
+  %2 = fadd double %d2, %1                      ; <double> [#uses=1]
+  %3 = fmul double %2, %d2                      ; <double> [#uses=1]
+  %4 = fsub double 0.000000e+00, %3               ; <double> [#uses=1]
+  br i1 %14, label %phi1.exit, label %bb.i35
+
+bb.i35:                                           ; preds = %bb142
+  %5 = call  double @sin(double %15) nounwind readonly ; <double> [#uses=1]
+  %6 = fmul double %5, 0x4031740AFA84AD8A         ; <double> [#uses=1]
+  %7 = fsub double 1.000000e+00, undef            ; <double> [#uses=1]
+  %8 = fdiv double %7, 6.000000e-01               ; <double> [#uses=1]
+  br label %phi1.exit
+
+phi1.exit:                                        ; preds = %bb.i35, %bb142
+  %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
+  %9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
+  %10 = fmul double %.pn, %9                      ; <double> [#uses=1]
+  br i1 %14, label %phi0.exit, label %bb.i
+
+bb.i:                                             ; preds = %phi1.exit
+  unreachable
+
+phi0.exit:                                        ; preds = %phi1.exit
+  %11 = fsub double %4, %10                       ; <double> [#uses=1]
+  %12 = fadd double 0.000000e+00, %11             ; <double> [#uses=1]
+  store double %12, double* undef, align 4
+  br label %bb142
+
+bb145.loopexit:                                   ; preds = %bb138
+  br i1 undef, label %bb.nph218.bb.nph218.split_crit_edge, label %bb159
+
+bb.nph218.bb.nph218.split_crit_edge:              ; preds = %bb145.loopexit
+  %13 = fmul double %0, 0x401921FB54442D18        ; <double> [#uses=1]
+  %14 = fcmp ugt double %0, 6.000000e-01          ; <i1> [#uses=2]
+  %15 = fdiv double %13, 6.000000e-01             ; <double> [#uses=1]
+  br label %bb142
+
+bb159:                                            ; preds = %bb145.loopexit, %smvp.exit, %bb134
+  unreachable
+
+bb166:                                            ; preds = %bb127
+  unreachable
+}
+
+declare double @sin(double) nounwind readonly
diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll

index 2f17e27ab4ed6bfaf808d22b40f6b8cc104efd45..f7494ec7d25dc665b33c86d05e22a1470814f69b 100644 (file)
--- a/test/CodeGen/Thumb2/machine-licm-vdup.ll
+++ b/test/CodeGen/Thumb2/machine-licm-vdup.ll
@@ -2,16 +2,17 @@
  ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s 
  ; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
  ; Eventually this should become the default and be moved into machine-licm.ll.
+; FIXME: the vdup should be hoisted out of the loop, 8248029.
  
  define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
  entry:
  ; CHECK: t2:
  ; CHECK: mov.w r3, #1065353216
-; CHECK: vdup.32 q{{.*}}, r3
    br i1 undef, label %bb1, label %bb2
  
  bb1:
  ; CHECK-NEXT: %bb1
+; CHECK: vdup.32 q{{.*}}, r3
    %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
    %tmp1 = shl i32 %indvar, 2
    %gep1 = getelementptr i8* %ptr1, i32 %tmp1
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll

index 9d144a4be0e908f7300a386673ccfde3fb07d6a7..afeb358da5725a7134edbdc616bfe46ca8bbd107 100644 (file)
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,9 +1,6 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
-; Now this test spills one register. But a reload in the loop is cheaper than
-; the divsd so it's a win.
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
  
  define fastcc void @fourn(double* %data, i32 %isign) nounwind {
-; CHECK: fourn
  entry:
         br label %bb
  
@@ -14,11 +11,6 @@ bb:          ; preds = %bb, %entry
         %1 = icmp sgt i32 %0, 2         ; <i1> [#uses=1]
         br i1 %1, label %bb30.loopexit, label %bb
  
-; CHECK: %bb30.loopexit
-; CHECK: divsd %xmm0
-; CHECK: movsd %xmm0, 16(%esp)
-; CHECK: .align
-; CHECK-NEXT: %bb3
  bb3:           ; preds = %bb30.loopexit, %bb25, %bb3
         %2 = load i32* null, align 4            ; <i32> [#uses=1]
         %3 = mul i32 %2, 0              ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll

index 4a97ac35afc7feddcf70f99fac3a24b017810f3c..bb01e5afceff14deb701a8b1fa96e5a4c7f23ba3 100644 (file)
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
  ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
  ; rdar://6627786
  ; rdar://7792037
author	Daniel Dunbar <daniel@zuster.org>
	Tue, 19 Oct 2010 17:14:24 +0000 (17:14 +0000)
committer	Daniel Dunbar <daniel@zuster.org>
	Tue, 19 Oct 2010 17:14:24 +0000 (17:14 +0000)
include/llvm/Target/TargetInstrInfo.h		patch \| blob \| history
lib/CodeGen/MachineLICM.cpp		patch \| blob \| history
lib/Target/ARM/ARMBaseInstrInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMBaseInstrInfo.h		patch \| blob \| history
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
lib/Target/X86/X86InstrInfo.h		patch \| blob \| history
test/CodeGen/ARM/remat.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/Thumb2/machine-licm-vdup.ll		patch \| blob \| history
test/CodeGen/X86/2008-10-27-CoalescerBug.ll		patch \| blob \| history
test/CodeGen/X86/2009-02-26-MachineLICMBug.ll		patch \| blob \| history