From 98694138025fdb0cec0cda5727201ad00ded3d63 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Tue, 19 Oct 2010 17:14:24 +0000 Subject: [PATCH] Revert r116781 "- Add a hook for target to determine whether an instruction def is", which breaks some nightly tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116816 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetInstrInfo.h | 14 --- lib/CodeGen/MachineLICM.cpp | 88 +++++++++++-------- lib/Target/ARM/ARMBaseInstrInfo.cpp | 20 ----- lib/Target/ARM/ARMBaseInstrInfo.h | 5 -- lib/Target/X86/X86InstrInfo.cpp | 35 -------- lib/Target/X86/X86InstrInfo.h | 5 -- test/CodeGen/ARM/remat.ll | 65 ++++++++++++++ test/CodeGen/Thumb2/machine-licm-vdup.ll | 3 +- test/CodeGen/X86/2008-10-27-CoalescerBug.ll | 10 +-- test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 2 +- 10 files changed, 121 insertions(+), 126 deletions(-) create mode 100644 test/CodeGen/ARM/remat.ll diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 936345c5046..6615ee4aacd 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -24,7 +24,6 @@ class InstrItineraryData; class LiveVariables; class MCAsmInfo; class MachineMemOperand; -class MachineRegisterInfo; class MDNode; class MCInst; class SDNode; @@ -626,19 +625,6 @@ public: int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; - - /// hasHighOperandLatency - Compute operand latency between a def of 'Reg' - /// and an use in the current loop, return true if the target considered - /// it 'high'. This is used by optimization passes such as machine LICM to - /// determine whether it makes sense to hoist an instruction out even in - /// high register pressure situation. - virtual - bool hasHighOperandLatency(const InstrItineraryData *ItinData, - const MachineRegisterInfo *MRI, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - return false; - } }; /// TargetInstrInfoImpl - This is the default implementation of diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index ae3d9db1dc3..7ecf126ad45 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -43,6 +43,11 @@ using namespace llvm; +static cl::opt +TrackRegPressure("rp-aware-machine-licm", + cl::desc("Register pressure aware machine LICM"), + cl::init(false), cl::Hidden); + STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -119,7 +124,6 @@ namespace { RegSeen.clear(); RegPressure.clear(); RegLimit.clear(); - BackTrace.clear(); for (DenseMap >::iterator CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) CI->second.clear(); @@ -167,10 +171,9 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' - /// and an use in the current loop, return true if the target considered - /// it 'high'. - bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg); + /// ComputeOperandLatency - Compute operand latency between a def of 'Reg' + /// and an use in the current loop. + int ComputeOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg); /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check /// if hoisting an instruction of the given cost matrix can cause high @@ -553,24 +556,28 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { if (!Preheader) return; - if (IsHeader) { - // Compute registers which are liveout of preheader. - RegSeen.clear(); - BackTrace.clear(); - InitRegPressure(Preheader); - } + if (TrackRegPressure) { + if (IsHeader) { + // Compute registers which are liveout of preheader. + RegSeen.clear(); + BackTrace.clear(); + InitRegPressure(Preheader); + } - // Remember livein register pressure. - BackTrace.push_back(RegPressure); + // Remember livein register pressure. + BackTrace.push_back(RegPressure); + } for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ) { MachineBasicBlock::iterator NextMII = MII; ++NextMII; MachineInstr *MI = &*MII; - UpdateRegPressureBefore(MI); + if (TrackRegPressure) + UpdateRegPressureBefore(MI); Hoist(MI, Preheader); - UpdateRegPressureAfter(MI); + if (TrackRegPressure) + UpdateRegPressureAfter(MI); MII = NextMII; } @@ -584,7 +591,8 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { HoistRegion(Children[I]); } - BackTrace.pop_back(); + if (TrackRegPressure) + BackTrace.pop_back(); } /// InitRegPressure - Find all virtual register references that are liveout of @@ -780,14 +788,15 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { } } -/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' -/// and an use in the current loop, return true if the target considered -/// it 'high'. -bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, - unsigned DefIdx, unsigned Reg) { +/// ComputeOperandLatency - Compute operand latency between a def of 'Reg' +/// and an use in the current loop. +int MachineLICM::ComputeOperandLatency(MachineInstr &MI, + unsigned DefIdx, unsigned Reg) { if (MRI->use_nodbg_empty(Reg)) - return false; + // No use? Return arbitrary large number! + return 300; + int Latency = -1; for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; ++I) { MachineInstr *UseMI = &*I; @@ -801,15 +810,18 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, if (MOReg != Reg) continue; - if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) - return true; + int UseCycle = TII->getOperandLatency(InstrItins, &MI, DefIdx, UseMI, i); + Latency = std::max(Latency, UseCycle); } - // Only look at the first in loop use. - break; + if (Latency != -1) + break; } - return false; + if (Latency == -1) + Latency = InstrItins->getOperandCycle(MI.getDesc().getSchedClass(), DefIdx); + + return Latency; } /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check @@ -843,19 +855,19 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; - // If the instruction is cheap, only hoist if it is re-materilizable. LICM - // will increase register pressure. It's probably not worth it if the - // instruction is cheap. + // FIXME: For now, only hoist re-materilizable instructions. LICM will + // increase register pressure. We want to make sure it doesn't increase + // spilling. // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting // these tend to help performance in low register pressure situation. The // trade off is it may cause spill in high pressure situation. It will end up // adding a store in the loop preheader. But the reload is no more expensive. // The side benefit is these loads are frequently CSE'ed. - if (MI.getDesc().isAsCheapAsAMove()) { - if (!TII->isTriviallyReMaterializable(&MI, AA)) + if (!TrackRegPressure || MI.getDesc().isAsCheapAsAMove()) { + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !isLoadFromConstantMemory(&MI)) return false; } else { - // Estimate register pressure to determine whether to LICM the instruction. // In low register pressure situation, we can be more aggressive about // hoisting. Also, favors hoisting long latency instructions even in // moderately high pressure situation. @@ -868,9 +880,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (MO.isDef()) { - if (HasHighOperandLatency(MI, i, Reg)) { - ++NumHighLatency; - return true; + if (InstrItins && !InstrItins->isEmpty()) { + int Cycle = ComputeOperandLatency(MI, i, Reg); + if (Cycle > 3) { + // FIXME: Target specific high latency limit? + ++NumHighLatency; + return true; + } } const TargetRegisterClass *RC = MRI->getRegClass(Reg); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0b5b2437abc..aca292abea5 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1925,23 +1925,3 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, UseTID, UseIdx, UseAlign); } - -bool ARMBaseInstrInfo:: -hasHighOperandLatency(const InstrItineraryData *ItinData, - const MachineRegisterInfo *MRI, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; - unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; - if (Subtarget.isCortexA8() && - (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) - // CortexA8 VFP instructions are not pipelined. - return true; - - // Hoist VFP / NEON instructions with 4 or higher latency. - int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); - if (Latency <= 3) - return false; - return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || - UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; -} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b3a832948f0..36be3366d29 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -377,11 +377,6 @@ private: unsigned DefIdx, unsigned DefAlign, const TargetInstrDesc &UseTID, unsigned UseIdx, unsigned UseAlign) const; - - bool hasHighOperandLatency(const InstrItineraryData *ItinData, - const MachineRegisterInfo *MRI, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const; }; static inline diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 79d9872a1b2..40ef3dbd0d7 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3152,41 +3152,6 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } -bool X86InstrInfo:: -hasHighOperandLatency(const InstrItineraryData *ItinData, - const MachineRegisterInfo *MRI, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - switch (DefMI->getOpcode()) { - default: return false; - case X86::DIVSDrm: - case X86::DIVSDrm_Int: - case X86::DIVSDrr: - case X86::DIVSDrr_Int: - case X86::DIVSSrm: - case X86::DIVSSrm_Int: - case X86::DIVSSrr: - case X86::DIVSSrr_Int: - case X86::SQRTPDm: - case X86::SQRTPDm_Int: - case X86::SQRTPDr: - case X86::SQRTPDr_Int: - case X86::SQRTPSm: - case X86::SQRTPSm_Int: - case X86::SQRTPSr: - case X86::SQRTPSr_Int: - case X86::SQRTSDm: - case X86::SQRTSDm_Int: - case X86::SQRTSDr: - case X86::SQRTSDr_Int: - case X86::SQRTSSm: - case X86::SQRTSSm_Int: - case X86::SQRTSSr: - case X86::SQRTSSr_Int: - return true; - } -} - namespace { /// CGBR - Create Global Base Reg pass. This initializes the PIC /// global base register for x86-32. diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 5060ad836af..e43cfacae50 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -864,11 +864,6 @@ public: unsigned OpNum, const SmallVectorImpl &MOs, unsigned Size, unsigned Alignment) const; - - bool hasHighOperandLatency(const InstrItineraryData *ItinData, - const MachineRegisterInfo *MRI, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const; private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll new file mode 100644 index 00000000000..6b86f1a9f36 --- /dev/null +++ b/test/CodeGen/ARM/remat.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization" + +define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind { +entry: + br i1 undef, label %smvp.exit, label %bb.i3 + +bb.i3: ; preds = %bb.i3, %bb134 + br i1 undef, label %smvp.exit, label %bb.i3 + +smvp.exit: ; preds = %bb.i3 + %0 = fmul double %d1, 2.400000e-03 ; [#uses=2] + br i1 undef, label %bb138.preheader, label %bb159 + +bb138.preheader: ; preds = %smvp.exit + br label %bb138 + +bb138: ; preds = %bb138, %bb138.preheader + br i1 undef, label %bb138, label %bb145.loopexit + +bb142: ; preds = %bb.nph218.bb.nph218.split_crit_edge, %phi0.exit + %1 = fmul double %d1, -1.200000e-03 ; [#uses=1] + %2 = fadd double %d2, %1 ; [#uses=1] + %3 = fmul double %2, %d2 ; [#uses=1] + %4 = fsub double 0.000000e+00, %3 ; [#uses=1] + br i1 %14, label %phi1.exit, label %bb.i35 + +bb.i35: ; preds = %bb142 + %5 = call double @sin(double %15) nounwind readonly ; [#uses=1] + %6 = fmul double %5, 0x4031740AFA84AD8A ; [#uses=1] + %7 = fsub double 1.000000e+00, undef ; [#uses=1] + %8 = fdiv double %7, 6.000000e-01 ; [#uses=1] + br label %phi1.exit + +phi1.exit: ; preds = %bb.i35, %bb142 + %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; [#uses=1] + %9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; [#uses=1] + %10 = fmul double %.pn, %9 ; [#uses=1] + br i1 %14, label %phi0.exit, label %bb.i + +bb.i: ; preds = %phi1.exit + unreachable + +phi0.exit: ; preds = %phi1.exit + %11 = fsub double %4, %10 ; [#uses=1] + %12 = fadd double 0.000000e+00, %11 ; [#uses=1] + store double %12, double* undef, align 4 + br label %bb142 + +bb145.loopexit: ; preds = %bb138 + br i1 undef, label %bb.nph218.bb.nph218.split_crit_edge, label %bb159 + +bb.nph218.bb.nph218.split_crit_edge: ; preds = %bb145.loopexit + %13 = fmul double %0, 0x401921FB54442D18 ; [#uses=1] + %14 = fcmp ugt double %0, 6.000000e-01 ; [#uses=2] + %15 = fdiv double %13, 6.000000e-01 ; [#uses=1] + br label %bb142 + +bb159: ; preds = %bb145.loopexit, %smvp.exit, %bb134 + unreachable + +bb166: ; preds = %bb127 + unreachable +} + +declare double @sin(double) nounwind readonly diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll index 2f17e27ab4e..f7494ec7d25 100644 --- a/test/CodeGen/Thumb2/machine-licm-vdup.ll +++ b/test/CodeGen/Thumb2/machine-licm-vdup.ll @@ -2,16 +2,17 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s ; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375. ; Eventually this should become the default and be moved into machine-licm.ll. +; FIXME: the vdup should be hoisted out of the loop, 8248029. define void @t2(i8* %ptr1, i8* %ptr2) nounwind { entry: ; CHECK: t2: ; CHECK: mov.w r3, #1065353216 -; CHECK: vdup.32 q{{.*}}, r3 br i1 undef, label %bb1, label %bb2 bb1: ; CHECK-NEXT: %bb1 +; CHECK: vdup.32 q{{.*}}, r3 %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] %tmp1 = shl i32 %indvar, 2 %gep1 = getelementptr i8* %ptr1, i32 %tmp1 diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll index 9d144a4be0e..afeb358da57 100644 --- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll +++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll @@ -1,9 +1,6 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s -; Now this test spills one register. But a reload in the loop is cheaper than -; the divsd so it's a win. +; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills} define fastcc void @fourn(double* %data, i32 %isign) nounwind { -; CHECK: fourn entry: br label %bb @@ -14,11 +11,6 @@ bb: ; preds = %bb, %entry %1 = icmp sgt i32 %0, 2 ; [#uses=1] br i1 %1, label %bb30.loopexit, label %bb -; CHECK: %bb30.loopexit -; CHECK: divsd %xmm0 -; CHECK: movsd %xmm0, 16(%esp) -; CHECK: .align -; CHECK-NEXT: %bb3 bb3: ; preds = %bb30.loopexit, %bb25, %bb3 %2 = load i32* null, align 4 ; [#uses=1] %3 = mul i32 %2, 0 ; [#uses=1] diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 4a97ac35afc..bb01e5afcef 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm} +; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm} ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s ; rdar://6627786 ; rdar://7792037 -- 2.34.1