X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineLICM.cpp;h=104eacdb96ecaf017d2d55a04eaefd8738dc96e6;hb=bf329b3414bbc24efb6d44952a7cbf66c83bcca1;hp=dfc622d0eda8cb882302101030ee27a10487c796;hpb=d6c23557898b1fe9b21b28023f4133cab84a8b83;p=oota-llvm.git diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index dfc622d0eda..104eacdb96e 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -22,6 +22,10 @@ #define DEBUG_TYPE "machine-licm" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -29,17 +33,13 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; static cl::opt @@ -62,7 +62,7 @@ namespace { class MachineLICM : public MachineFunctionPass { const TargetMachine *TM; const TargetInstrInfo *TII; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; const TargetRegisterInfo *TRI; const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; @@ -80,6 +80,14 @@ namespace { MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + // Exit blocks for CurLoop. + SmallVector ExitBlocks; + + bool isExitBlock(const MachineBasicBlock *MBB) const { + return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != + ExitBlocks.end(); + } + // Track 'estimated' register pressure. SmallSet RegSeen; SmallVector RegPressure; @@ -164,7 +172,7 @@ namespace { BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet &StoredFIs, - SmallVector &Candidates); + SmallVectorImpl &Candidates); /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the /// current loop. @@ -182,9 +190,9 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasAnyPHIUse - Return true if the specified register is used by any - /// phi node. - bool HasAnyPHIUse(unsigned Reg) const; + /// HasLoopPHIUse - Return true if the specified instruction is used by any + /// phi node in the current loop. + bool HasLoopPHIUse(const MachineInstr *MI) const; /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered @@ -197,7 +205,7 @@ namespace { /// CanCauseHighRegPressure - Visit BBs from header to current BB, /// check if hoisting an instruction of the given cost matrix can cause high /// register pressure. - bool CanCauseHighRegPressure(DenseMap &Cost); + bool CanCauseHighRegPressure(DenseMap &Cost, bool Cheap); /// UpdateBackTraceRegPressure - Traverse the back trace from header to /// the current block and update their register pressures to reflect the @@ -326,7 +334,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); else DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); - DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); + DEBUG(dbgs() << MF.getName() << " ********\n"); if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. @@ -348,6 +356,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); CurPreheader = 0; + ExitBlocks.clear(); // If this is done before regalloc, only visit outer-most preheader-sporting // loops. @@ -356,6 +365,8 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { continue; } + CurLoop->getExitBlocks(ExitBlocks); + if (!PreRegAlloc) HoistRegionPostRA(); else { @@ -393,7 +404,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet &StoredFIs, - SmallVector &Candidates) { + SmallVectorImpl &Candidates) { bool RuledOut = false; bool HasNonInvariantUse = false; unsigned Def = 0; @@ -434,8 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI, } if (MO.isImplicit()) { - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) - PhysRegClobbers.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRegClobbers.set(*AI); if (!MO.isDead()) // Non-dead implicit def? This cannot be hoisted. RuledOut = true; @@ -454,15 +465,15 @@ void MachineLICM::ProcessMI(MachineInstr *MI, // If we have already seen another instruction that defines the same // register, then this is not safe. Two defs is indicated by setting a // PhysRegClobbers bit. - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) { + for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { if (PhysRegDefs.test(*AS)) PhysRegClobbers.set(*AS); - if (PhysRegClobbers.test(*AS)) - // MI defined register is seen defined by another instruction in - // the loop, it cannot be a LICM candidate. - RuledOut = true; PhysRegDefs.set(*AS); } + if (PhysRegClobbers.test(Reg)) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; } // Only consider reloads for now and remats which do not have register @@ -491,7 +502,7 @@ void MachineLICM::HoistRegionPostRA() { // Walk the entire region, count number of defs for each register, and // collect potential LICM candidates. - const std::vector Blocks = CurLoop->getBlocks(); + const std::vector &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; @@ -506,8 +517,8 @@ void MachineLICM::HoistRegionPostRA() { for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) - PhysRegDefs.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRegDefs.set(*AI); } SpeculationState = SpeculateUnknown; @@ -529,8 +540,8 @@ void MachineLICM::HoistRegionPostRA() { unsigned Reg = MO.getReg(); if (!Reg) continue; - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) - TermRegs.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + TermRegs.set(*AI); } } @@ -573,7 +584,7 @@ void MachineLICM::HoistRegionPostRA() { /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current /// loop, and make sure it is not killed by any instructions in the loop. void MachineLICM::AddToLiveIns(unsigned Reg) { - const std::vector Blocks = CurLoop->getBlocks(); + const std::vector &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; if (!BB->isLiveIn(Reg)) @@ -769,7 +780,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, unsigned Reg, unsigned OpIdx, unsigned &RCId, unsigned &RCCost) const { const TargetRegisterClass *RC = MRI->getRegClass(Reg); - EVT VT = *RC->vt_begin(); + MVT VT = *RC->vt_begin(); if (VT == MVT::Untyped) { RCId = RC->getID(); RCCost = 1; @@ -955,22 +966,40 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasAnyPHIUse - Return true if the specified register is used by any -/// phi node. -bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->isPHI()) - return true; - // Look pass copies as well. - if (UseMI->isCopy()) { - unsigned Def = UseMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Def) && - HasAnyPHIUse(Def)) - return true; +/// HasLoopPHIUse - Return true if the specified instruction is used by a +/// phi node and hoisting it could cause a copy to be inserted. +bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { + SmallVector Work(1, MI); + do { + MI = Work.pop_back_val(); + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + // A PHI may cause a copy to be inserted. + if (UseMI->isPHI()) { + // A PHI inside the loop causes a copy because the live range of Reg is + // extended across the PHI. + if (CurLoop->contains(UseMI)) + return true; + // A PHI in an exit block can cause a copy to be inserted if the PHI + // has multiple predecessors in the loop with different values. + // For now, approximate by rejecting all exit blocks. + if (isExitBlock(UseMI->getParent())) + return true; + continue; + } + // Look past copies as well. + if (UseMI->isCopy() && CurLoop->contains(UseMI)) + Work.push_back(UseMI); + } } - } + } while (!Work.empty()); return false; } @@ -1038,7 +1067,8 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { /// CanCauseHighRegPressure - Visit BBs from header to current BB, check /// if hoisting an instruction of the given cost matrix can cause high /// register pressure. -bool MachineLICM::CanCauseHighRegPressure(DenseMap &Cost) { +bool MachineLICM::CanCauseHighRegPressure(DenseMap &Cost, + bool CheapInstr) { for (DenseMap::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { if (CI->second <= 0) @@ -1047,8 +1077,14 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap &Cost) { unsigned RCId = CI->first; unsigned Limit = RegLimit[RCId]; int Cost = CI->second; + + // Don't hoist cheap instructions if they would increase register pressure, + // even if we're under the limit. + if (CheapInstr) + return true; + for (unsigned i = BackTrace.size(); i != 0; --i) { - SmallVector &RP = BackTrace[i-1]; + SmallVectorImpl &RP = BackTrace[i-1]; if (RP[RCId] + Cost >= Limit) return true; } @@ -1094,7 +1130,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { // Update register pressure of blocks from loop header to current block. for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { - SmallVector &RP = BackTrace[i]; + SmallVectorImpl &RP = BackTrace[i]; for (DenseMap::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { unsigned RCId = CI->first; @@ -1109,87 +1145,95 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; - // If the instruction is cheap, only hoist if it is re-materilizable. LICM - // will increase register pressure. It's probably not worth it if the - // instruction is cheap. - // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting - // these tend to help performance in low register pressure situation. The - // trade off is it may cause spill in high pressure situation. It will end up - // adding a store in the loop preheader. But the reload is no more expensive. - // The side benefit is these loads are frequently CSE'ed. - if (IsCheapInstruction(MI)) { - if (!TII->isTriviallyReMaterializable(&MI, AA)) - return false; - } else { - // Estimate register pressure to determine whether to LICM the instruction. - // In low register pressure situation, we can be more aggressive about - // hoisting. Also, favors hoisting long latency instructions even in - // moderately high pressure situation. - // FIXME: If there are long latency loop-invariant instructions inside the - // loop at this point, why didn't the optimizer's LICM hoist them? - DenseMap Cost; - for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; + // Besides removing computation from the loop, hoisting an instruction has + // these effects: + // + // - The value defined by the instruction becomes live across the entire + // loop. This increases register pressure in the loop. + // + // - If the value is used by a PHI in the loop, a copy will be required for + // lowering the PHI after extending the live range. + // + // - When hoisting the last use of a value in the loop, that value no longer + // needs to be live in the loop. This lowers register pressure in the loop. + + bool CheapInstr = IsCheapInstruction(MI); + bool CreatesCopy = HasLoopPHIUse(&MI); + + // Don't hoist a cheap instruction if it would create a copy in the loop. + if (CheapInstr && CreatesCopy) { + DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); + return false; + } - unsigned RCId, RCCost; - getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - if (HasHighOperandLatency(MI, i, Reg)) { - ++NumHighLatency; - return true; - } + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(&MI, AA)) + return true; - DenseMap::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second += RCCost; - else - Cost.insert(std::make_pair(RCId, RCCost)); - } else if (isOperandKill(MO, MRI)) { - // Is a virtual register use is a kill, hoisting it out of the loop - // may actually reduce register pressure or be register pressure - // neutral. - DenseMap::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second -= RCCost; - else - Cost.insert(std::make_pair(RCId, -RCCost)); + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + // FIXME: If there are long latency loop-invariant instructions inside the + // loop at this point, why didn't the optimizer's LICM hoist them? + DenseMap Cost; + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + unsigned RCId, RCCost; + getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); + if (MO.isDef()) { + if (HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; } + Cost[RCId] += RCCost; + } else if (isOperandKill(MO, MRI)) { + // Is a virtual register use is a kill, hoisting it out of the loop + // may actually reduce register pressure or be register pressure + // neutral. + Cost[RCId] -= RCCost; } + } - // Visit BBs from header to current BB, if hoisting this doesn't cause - // high register pressure, then it's safe to proceed. - if (!CanCauseHighRegPressure(Cost)) { - ++NumLowRP; - return true; - } + // Visit BBs from header to current BB, if hoisting this doesn't cause + // high register pressure, then it's safe to proceed. + if (!CanCauseHighRegPressure(Cost, CheapInstr)) { + DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); + ++NumLowRP; + return true; + } - // Do not "speculate" in high register pressure situation. If an - // instruction is not guaranteed to be executed in the loop, it's best to be - // conservative. - if (AvoidSpeculation && - (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) - return false; + // Don't risk increasing register pressure if it would create copies. + if (CreatesCopy) { + DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); + return false; + } - // High register pressure situation, only hoist if the instruction is going - // to be remat'ed. - if (!TII->isTriviallyReMaterializable(&MI, AA) && - !MI.isInvariantLoad(AA)) - return false; + // Do not "speculate" in high register pressure situation. If an + // instruction is not guaranteed to be executed in the loop, it's best to be + // conservative. + if (AvoidSpeculation && + (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { + DEBUG(dbgs() << "Won't speculate: " << MI); + return false; } - // If result(s) of this instruction is used by PHIs outside of the loop, then - // don't hoist it if the instruction because it will introduce an extra copy. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - if (HasAnyPHIUse(MO.getReg())) - return false; + // High register pressure situation, only hoist if the instruction is going + // to be remat'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !MI.isInvariantLoad(AA)) { + DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); + return false; } return true; @@ -1216,11 +1260,11 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { if (NewOpc == 0) return 0; const MCInstrDesc &MID = TII->get(NewOpc); if (MID.getNumDefs() != 1) return 0; - const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); + MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = MRI->createVirtualRegister(RC); - MachineFunction &MF = *MI->getParent()->getParent(); SmallVector NewMIs; bool Success = TII->unfoldMemoryOperand(MF, MI, Reg,