X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocLinearScan.cpp;h=0875257550bb68e26dde70f2cfce531360dc9e40;hb=f8b1a5ea9602bb65a5cf59d3d34f2851a08cdc3e;hp=91dda771475964ce38f0098fa1ccfc7fb99545cf;hpb=ae73dc1448d25b02cabc7c64c86c64371453dda8;p=oota-llvm.git diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 91dda771475..0875257550b 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -12,11 +12,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "PhysRegTracker.h" +#include "LiveDebugVariables.h" #include "VirtRegMap.h" +#include "VirtRegRewriter.h" +#include "Spiller.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -26,36 +29,87 @@ #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include #include #include #include #include + using namespace llvm; STATISTIC(NumIters , "Number of iterations performed"); STATISTIC(NumBacktracks, "Number of times we had to backtrack"); STATISTIC(NumCoalesce, "Number of copies coalesced"); +STATISTIC(NumDowngrade, "Number of registers downgraded"); static cl::opt NewHeuristic("new-spilling-heuristic", cl::desc("Use new spilling heuristic"), cl::init(false), cl::Hidden); +static cl::opt +PreSplitIntervals("pre-alloc-split", + cl::desc("Pre-register allocation live interval splitting"), + cl::init(false), cl::Hidden); + +static cl::opt +TrivCoalesceEnds("trivial-coalesce-ends", + cl::desc("Attempt trivial coalescing of interval ends"), + cl::init(false), cl::Hidden); + static RegisterRegAlloc -linearscanRegAlloc("linearscan", " linear scan register allocator", +linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); namespace { - struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass { + // When we allocate a register, add it to a fixed-size queue of + // registers to skip in subsequent allocations. This trades a small + // amount of register pressure and increased spills for flexibility in + // the post-pass scheduler. + // + // Note that in a the number of registers used for reloading spills + // will be one greater than the value of this option. + // + // One big limitation of this is that it doesn't differentiate between + // different register classes. So on x86-64, if there is xmm register + // pressure, it can caused fewer GPRs to be held in the queue. + static cl::opt + NumRecentlyUsedRegs("linearscan-skip-count", + cl::desc("Number of registers for linearscan to remember" + "to skip."), + cl::init(0), + cl::Hidden); + + struct RALinScan : public MachineFunctionPass { static char ID; - RALinScan() : MachineFunctionPass(&ID) {} + RALinScan() : MachineFunctionPass(ID) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerAnalysisGroup( + *PassRegistry::getPassRegistry()); + initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); + initializePreAllocSplittingPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + + // Initialize the queue to record recently-used registers. + if (NumRecentlyUsedRegs > 0) + RecentRegs.resize(NumRecentlyUsedRegs, 0); + RecentNext = RecentRegs.begin(); + } typedef std::pair IntervalPtr; typedef SmallVector IntervalPtrs; @@ -66,16 +120,28 @@ namespace { EquivalenceClasses RelatedRegClasses; DenseMap OneClassForEachPhysReg; + // NextReloadMap - For each register in the map, it maps to the another + // register which is defined by a reload from the same stack slot and + // both reloads are in the same basic block. + DenseMap NextReloadMap; + + // DowngradedRegs - A set of registers which are being "downgraded", i.e. + // un-favored for allocation. + SmallSet DowngradedRegs; + + // DowngradeMap - A map from virtual registers to physical registers being + // downgraded for the virtual registers. + DenseMap DowngradeMap; + MachineFunction* mf_; MachineRegisterInfo* mri_; const TargetMachine* tm_; const TargetRegisterInfo* tri_; const TargetInstrInfo* tii_; - MachineRegisterInfo *reginfo_; BitVector allocatableRegs_; + BitVector reservedRegs_; LiveIntervals* li_; - LiveStacks* ls_; - const MachineLoopInfo *loopInfo; + MachineLoopInfo *loopInfo; /// handled_ - Intervals are added to the handled_ set in the order of their /// start value. This is uses for backtracking. @@ -97,24 +163,60 @@ namespace { SmallVector, greater_ptr > IntervalHeap; IntervalHeap unhandled_; - std::auto_ptr prt_; - std::auto_ptr vrm_; + + /// regUse_ - Tracks register usage. + SmallVector regUse_; + SmallVector regUseBackUp_; + + /// vrm_ - Tracks register assignments. + VirtRegMap* vrm_; + + std::auto_ptr rewriter_; + std::auto_ptr spiller_; + // The queue of recently-used registers. + SmallVector RecentRegs; + SmallVector::iterator RecentNext; + + // Record that we just picked this register. + void recordRecentlyUsed(unsigned reg) { + assert(reg != 0 && "Recently used register is NOREG!"); + if (!RecentRegs.empty()) { + *RecentNext++ = reg; + if (RecentNext == RecentRegs.end()) + RecentNext = RecentRegs.begin(); + } + } + public: virtual const char* getPassName() const { return "Linear Scan Register Allocator"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); + if (StrongPHIElim) + AU.addRequiredID(StrongPHIEliminationID); // Make sure PassManager knows which analyses to make available // to coalescing and which analyses coalescing invalidates. AU.addRequiredTransitive(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + if (PreSplitIntervals) + AU.addRequiredID(PreAllocSplittingID); + AU.addRequiredID(LiveStacksID); + AU.addPreservedID(LiveStacksID); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequiredID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -122,6 +224,12 @@ namespace { /// runOnMachineFunction - register allocate the whole function bool runOnMachineFunction(MachineFunction&); + // Determine if we skip this register due to its being recently used. + bool isRecentlyUsed(unsigned reg) const { + return std::find(RecentRegs.begin(), RecentRegs.end(), reg) != + RecentRegs.end(); + } + private: /// linearScan - the linear scan algorithm void linearScan(); @@ -132,16 +240,30 @@ namespace { /// processActiveIntervals - expire old intervals and move non-overlapping /// ones to the inactive list. - void processActiveIntervals(unsigned CurPoint); + void processActiveIntervals(SlotIndex CurPoint); /// processInactiveIntervals - expire old intervals and move overlapping /// ones to the active list. - void processInactiveIntervals(unsigned CurPoint); + void processInactiveIntervals(SlotIndex CurPoint); + + /// hasNextReloadInterval - Return the next liveinterval that's being + /// defined by a reload from the same SS as the specified one. + LiveInterval *hasNextReloadInterval(LiveInterval *cur); + + /// DowngradeRegister - Downgrade a register for allocation. + void DowngradeRegister(LiveInterval *li, unsigned Reg); + + /// UpgradeRegister - Upgrade a register for allocation. + void UpgradeRegister(unsigned Reg); /// assignRegOrStackSlotAtInterval - assign a register if one /// is available, or spill. void assignRegOrStackSlotAtInterval(LiveInterval* cur); + void updateSpillWeights(std::vector &Weights, + unsigned reg, float weight, + const TargetRegisterClass *RC); + /// findIntervalsToSpill - Determine the intervals to spill for the /// specified interval. It's passed the physical registers whose spill /// weight is the lowest among all the registers whose live intervals @@ -152,60 +274,145 @@ namespace { SmallVector &SpillIntervals); /// attemptTrivialCoalescing - If a simple interval is defined by a copy, - /// try allocate the definition the same register as the source register - /// if the register is not defined during live time of the interval. This - /// eliminate a copy. This is used to coalesce copies which were not + /// try to allocate the definition to the same register as the source, + /// if the register is not defined during the life time of the interval. + /// This eliminates a copy, and is used to coalesce copies which were not /// coalesced away before allocation either due to dest and src being in /// different register classes or because the coalescer was overly /// conservative. unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); /// - /// register handling helpers + /// Register usage / availability tracking helpers. + /// + + void initRegUses() { + regUse_.resize(tri_->getNumRegs(), 0); + regUseBackUp_.resize(tri_->getNumRegs(), 0); + } + + void finalizeRegUses() { +#ifndef NDEBUG + // Verify all the registers are "freed". + bool Error = false; + for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { + if (regUse_[i] != 0) { + dbgs() << tri_->getName(i) << " is still in use!\n"; + Error = true; + } + } + if (Error) + llvm_unreachable(0); +#endif + regUse_.clear(); + regUseBackUp_.clear(); + } + + void addRegUse(unsigned physReg) { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + ++regUse_[physReg]; + for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) + ++regUse_[*as]; + } + + void delRegUse(unsigned physReg) { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + assert(regUse_[physReg] != 0); + --regUse_[physReg]; + for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) { + assert(regUse_[*as] != 0); + --regUse_[*as]; + } + } + + bool isRegAvail(unsigned physReg) const { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + return regUse_[physReg] == 0; + } + + void backUpRegUses() { + regUseBackUp_ = regUse_; + } + + void restoreRegUses() { + regUse_ = regUseBackUp_; + } + + /// + /// Register handling helpers. /// /// getFreePhysReg - return a free physical register for this virtual /// register interval if we have one, otherwise return 0. unsigned getFreePhysReg(LiveInterval* cur); - - /// assignVirt2StackSlot - assigns this virtual register to a - /// stack slot. returns the stack slot - int assignVirt2StackSlot(unsigned virtReg); + unsigned getFreePhysReg(LiveInterval* cur, + const TargetRegisterClass *RC, + unsigned MaxInactiveCount, + SmallVector &inactiveCounts, + bool SkipDGRegs); + + /// getFirstNonReservedPhysReg - return the first non-reserved physical + /// register in the register class. + unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { + TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_); + TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_); + while (i != aoe && reservedRegs_.test(*i)) + ++i; + assert(i != aoe && "All registers reserved?!"); + return *i; + } void ComputeRelatedRegClasses(); template void printIntervals(const char* const str, ItTy i, ItTy e) const { - if (str) DOUT << str << " intervals:\n"; - for (; i != e; ++i) { - DOUT << "\t" << *i->first << " -> "; - unsigned reg = i->first->reg; - if (TargetRegisterInfo::isVirtualRegister(reg)) { - reg = vrm_->getPhys(reg); - } - DOUT << tri_->getName(reg) << '\n'; - } + DEBUG({ + if (str) + dbgs() << str << " intervals:\n"; + + for (; i != e; ++i) { + dbgs() << "\t" << *i->first << " -> "; + + unsigned reg = i->first->reg; + if (TargetRegisterInfo::isVirtualRegister(reg)) + reg = vrm_->getPhys(reg); + + dbgs() << tri_->getName(reg) << '\n'; + } + }); } }; char RALinScan::ID = 0; } -static RegisterPass -X("linearscan-regalloc", "Linear Scan Register Allocator"); +INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", + "Linear Scan Register Allocator", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) +INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) +INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_AG_DEPENDENCY(RegisterCoalescer) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", + "Linear Scan Register Allocator", false, false) void RALinScan::ComputeRelatedRegClasses() { - const TargetRegisterInfo &TRI = *tri_; - // First pass, add all reg classes to the union, and determine at least one // reg class that each register is in. bool HasAliases = false; - for (TargetRegisterInfo::regclass_iterator RCI = TRI.regclass_begin(), - E = TRI.regclass_end(); RCI != E; ++RCI) { + for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(), + E = tri_->regclass_end(); RCI != E; ++RCI) { RelatedRegClasses.insert(*RCI); for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); I != E; ++I) { - HasAliases = HasAliases || *TRI.getAliasSet(*I) != 0; - + HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; + const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; if (PRC) { // Already processed this register. Just make sure we know that @@ -216,7 +423,7 @@ void RALinScan::ComputeRelatedRegClasses() { } } } - + // Second pass, now that we know conservatively what register classes each reg // belongs to, add info about aliases. We don't need to do this for targets // without register aliases. @@ -224,52 +431,74 @@ void RALinScan::ComputeRelatedRegClasses() { for (DenseMap::iterator I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); I != E; ++I) - for (const unsigned *AS = TRI.getAliasSet(I->first); *AS; ++AS) + for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); } -/// attemptTrivialCoalescing - If a simple interval is defined by a copy, -/// try allocate the definition the same register as the source register -/// if the register is not defined during live time of the interval. This -/// eliminate a copy. This is used to coalesce copies which were not -/// coalesced away before allocation either due to dest and src being in -/// different register classes or because the coalescer was overly -/// conservative. +/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try +/// allocate the definition the same register as the source register if the +/// register is not defined during live time of the interval. If the interval is +/// killed by a copy, try to use the destination register. This eliminates a +/// copy. This is used to coalesce copies which were not coalesced away before +/// allocation either due to dest and src being in different register classes or +/// because the coalescer was overly conservative. unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { - if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue()) + unsigned Preference = vrm_->getRegAllocPref(cur.reg); + if ((Preference && Preference == Reg) || !cur.containsOneValue()) return Reg; - VNInfo *vni = cur.getValNumInfo(0); - if (!vni->def || vni->def == ~1U || vni->def == ~0U) + // We cannot handle complicated live ranges. Simple linear stuff only. + if (cur.ranges.size() != 1) return Reg; - MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - unsigned SrcReg, DstReg; - if (!CopyMI || !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg)) + + const LiveRange &range = cur.ranges.front(); + + VNInfo *vni = range.valno; + if (vni->isUnused()) return Reg; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { - if (!vrm_->isAssignedReg(SrcReg)) - return Reg; + + unsigned CandReg; + { + MachineInstr *CopyMI; + if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) + // Defined by a copy, try to extend SrcReg forward + CandReg = CopyMI->getOperand(1).getReg(); + else if (TrivCoalesceEnds && + (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && + CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg()) + // Only used by a copy, try to extend DstReg backwards + CandReg = CopyMI->getOperand(0).getReg(); else - SrcReg = vrm_->getPhys(SrcReg); + return Reg; + + // If the target of the copy is a sub-register then don't coalesce. + if(CopyMI->getOperand(0).getSubReg()) + return Reg; + } + + if (TargetRegisterInfo::isVirtualRegister(CandReg)) { + if (!vrm_->isAssignedReg(CandReg)) + return Reg; + CandReg = vrm_->getPhys(CandReg); } - if (Reg == SrcReg) + if (Reg == CandReg) return Reg; - const TargetRegisterClass *RC = reginfo_->getRegClass(cur.reg); - if (!RC->contains(SrcReg)) + const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); + if (!RC->contains(CandReg)) + return Reg; + + if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg)) return Reg; // Try to coalesce. - if (!li_->conflictsWithPhysRegDef(cur, *vrm_, SrcReg)) { - DOUT << "Coalescing: " << cur << " -> " << tri_->getName(SrcReg) - << '\n'; - vrm_->clearVirt(cur.reg); - vrm_->assignVirt2Phys(cur.reg, SrcReg); - ++NumCoalesce; - return SrcReg; - } + DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) + << '\n'); + vrm_->clearVirt(cur.reg); + vrm_->assignVirt2Phys(cur.reg, CandReg); - return Reg; + ++NumCoalesce; + return CandReg; } bool RALinScan::runOnMachineFunction(MachineFunction &fn) { @@ -278,10 +507,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { tm_ = &fn.getTarget(); tri_ = tm_->getRegisterInfo(); tii_ = tm_->getInstrInfo(); - reginfo_ = &mf_->getRegInfo(); allocatableRegs_ = tri_->getAllocatableSet(fn); + reservedRegs_ = tri_->getReservedRegs(fn); li_ = &getAnalysis(); - ls_ = &getAnalysis(); loopInfo = &getAnalysis(); // We don't run the coalescer here because we have no reason to @@ -292,24 +520,37 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { // If this is the first function compiled, compute the related reg classes. if (RelatedRegClasses.empty()) ComputeRelatedRegClasses(); - - if (!prt_.get()) prt_.reset(new PhysRegTracker(*tri_)); - vrm_.reset(new VirtRegMap(*mf_)); - if (!spiller_.get()) spiller_.reset(createSpiller()); + + // Also resize register usage trackers. + initRegUses(); + + vrm_ = &getAnalysis(); + if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); + + spiller_.reset(createSpiller(*this, *mf_, *vrm_)); initIntervalSets(); linearScan(); // Rewrite spill code and update the PhysRegsUsed set. - spiller_->runOnMachineFunction(*mf_, *vrm_); - vrm_.reset(); // Free the VirtRegMap + rewriter_->runOnMachineFunction(*mf_, *vrm_, li_); + + // Write out new DBG_VALUE instructions. + getAnalysis().emitDebugValues(vrm_); assert(unhandled_.empty() && "Unhandled live intervals remain!"); + + finalizeRegUses(); + fixed_.clear(); active_.clear(); inactive_.clear(); handled_.clear(); + NextReloadMap.clear(); + DowngradedRegs.clear(); + DowngradeMap.clear(); + spiller_.reset(0); return true; } @@ -326,61 +567,73 @@ void RALinScan::initIntervalSets() for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { - reginfo_->setPhysRegUsed(i->second->reg); - fixed_.push_back(std::make_pair(i->second, i->second->begin())); - } else - unhandled_.push(i->second); + if (!i->second->empty()) { + mri_->setPhysRegUsed(i->second->reg); + fixed_.push_back(std::make_pair(i->second, i->second->begin())); + } + } else { + if (i->second->empty()) { + assignRegOrStackSlotAtInterval(i->second); + } + else + unhandled_.push(i->second); + } } } -void RALinScan::linearScan() -{ +void RALinScan::linearScan() { // linear scan algorithm - DOUT << "********** LINEAR SCAN **********\n"; - DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n'; - - DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end())); + DEBUG({ + dbgs() << "********** LINEAR SCAN **********\n" + << "********** Function: " + << mf_->getFunction()->getName() << '\n'; + printIntervals("fixed", fixed_.begin(), fixed_.end()); + }); while (!unhandled_.empty()) { // pick the interval with the earliest start point LiveInterval* cur = unhandled_.top(); unhandled_.pop(); ++NumIters; - DOUT << "\n*** CURRENT ***: " << *cur << '\n'; + DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n'); - if (!cur->empty()) { - processActiveIntervals(cur->beginNumber()); - processInactiveIntervals(cur->beginNumber()); + assert(!cur->empty() && "Empty interval in unhandled set."); - assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && - "Can only allocate virtual registers!"); - } + processActiveIntervals(cur->beginIndex()); + processInactiveIntervals(cur->beginIndex()); + + assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && + "Can only allocate virtual registers!"); // Allocating a virtual register. try to find a free // physical register or spill an interval (possibly this one) in order to // assign it one. assignRegOrStackSlotAtInterval(cur); - DEBUG(printIntervals("active", active_.begin(), active_.end())); - DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end())); + DEBUG({ + printIntervals("active", active_.begin(), active_.end()); + printIntervals("inactive", inactive_.begin(), inactive_.end()); + }); } - // expire any remaining active intervals + // Expire any remaining active intervals while (!active_.empty()) { IntervalPtr &IP = active_.back(); unsigned reg = IP.first->reg; - DOUT << "\tinterval " << *IP.first << " expired\n"; + DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); - prt_->delRegUse(reg); + delRegUse(reg); active_.pop_back(); } - // expire any remaining inactive intervals - DEBUG(for (IntervalPtrs::reverse_iterator - i = inactive_.rbegin(); i != inactive_.rend(); ++i) - DOUT << "\tinterval " << *i->first << " expired\n"); + // Expire any remaining inactive intervals + DEBUG({ + for (IntervalPtrs::reverse_iterator + i = inactive_.rbegin(); i != inactive_.rend(); ++i) + dbgs() << "\tinterval " << *i->first << " expired\n"; + }); inactive_.clear(); // Add live-ins to every BB except for entry. Also perform trivial coalescing. @@ -399,26 +652,35 @@ void RALinScan::linearScan() // Ignore splited live intervals. if (!isPhys && vrm_->getPreSplitReg(cur.reg)) continue; + for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); I != E; ++I) { const LiveRange &LR = *I; - if (li_->findLiveInMBBs(LR, LiveInMBBs)) { + if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) - if (LiveInMBBs[i] != EntryMBB) + if (LiveInMBBs[i] != EntryMBB) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Adding a virtual register to livein set?"); LiveInMBBs[i]->addLiveIn(Reg); + } LiveInMBBs.clear(); } } } - DOUT << *vrm_; + DEBUG(dbgs() << *vrm_); + + // Look for physical registers that end up not being allocated even though + // register allocator had to spill other registers in its register class. + if (!vrm_->FindUnusedRegisters(li_)) + return; } /// processActiveIntervals - expire old intervals and move non-overlapping ones /// to the inactive list. -void RALinScan::processActiveIntervals(unsigned CurPoint) +void RALinScan::processActiveIntervals(SlotIndex CurPoint) { - DOUT << "\tprocessing active intervals:\n"; + DEBUG(dbgs() << "\tprocessing active intervals:\n"); for (unsigned i = 0, e = active_.size(); i != e; ++i) { LiveInterval *Interval = active_[i].first; @@ -428,11 +690,11 @@ void RALinScan::processActiveIntervals(unsigned CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // Remove expired intervals. - DOUT << "\t\tinterval " << *Interval << " expired\n"; + DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); - prt_->delRegUse(reg); + delRegUse(reg); // Pop off the end of the list. active_[i] = active_.back(); @@ -441,11 +703,11 @@ void RALinScan::processActiveIntervals(unsigned CurPoint) } else if (IntervalPos->start > CurPoint) { // Move inactive intervals to inactive list. - DOUT << "\t\tinterval " << *Interval << " inactive\n"; + DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); - prt_->delRegUse(reg); + delRegUse(reg); // add to inactive. inactive_.push_back(std::make_pair(Interval, IntervalPos)); @@ -462,9 +724,9 @@ void RALinScan::processActiveIntervals(unsigned CurPoint) /// processInactiveIntervals - expire old intervals and move overlapping /// ones to the active list. -void RALinScan::processInactiveIntervals(unsigned CurPoint) +void RALinScan::processInactiveIntervals(SlotIndex CurPoint) { - DOUT << "\tprocessing inactive intervals:\n"; + DEBUG(dbgs() << "\tprocessing inactive intervals:\n"); for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { LiveInterval *Interval = inactive_[i].first; @@ -474,7 +736,7 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // remove expired intervals. - DOUT << "\t\tinterval " << *Interval << " expired\n"; + DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); // Pop off the end of the list. inactive_[i] = inactive_.back(); @@ -482,11 +744,11 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint) --i; --e; } else if (IntervalPos->start <= CurPoint) { // move re-activated intervals in active list - DOUT << "\t\tinterval " << *Interval << " active\n"; + DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); - prt_->addRegUse(reg); + addRegUse(reg); // add to active active_.push_back(std::make_pair(Interval, IntervalPos)); @@ -503,12 +765,35 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint) /// updateSpillWeights - updates the spill weights of the specifed physical /// register and its weight. -static void updateSpillWeights(std::vector &Weights, - unsigned reg, float weight, - const TargetRegisterInfo *TRI) { +void RALinScan::updateSpillWeights(std::vector &Weights, + unsigned reg, float weight, + const TargetRegisterClass *RC) { + SmallSet Processed; + SmallSet SuperAdded; + SmallVector Supers; Weights[reg] += weight; - for (const unsigned* as = TRI->getAliasSet(reg); *as; ++as) + Processed.insert(reg); + for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) { Weights[*as] += weight; + Processed.insert(*as); + if (tri_->isSubRegister(*as, reg) && + SuperAdded.insert(*as) && + RC->contains(*as)) { + Supers.push_back(*as); + } + } + + // If the alias is a super-register, and the super-register is in the + // register class we are trying to allocate. Then add the weight to all + // sub-registers of the super-register even if they are not aliases. + // e.g. allocating for GR32, bh is not used, updating bl spill weight. + // bl should get the same spill weight otherwise it will be choosen + // as a spill candidate since spilling bh doesn't make ebx available. + for (unsigned i = 0, e = Supers.size(); i != e; ++i) { + for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) + if (!Processed.count(*sr)) + Weights[*sr] += weight; + } } static @@ -520,7 +805,8 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { return IP.end(); } -static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){ +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, + SlotIndex Point){ for (unsigned i = 0, e = V.size(); i != e; ++i) { RALinScan::IntervalPtr &IP = V[i]; LiveInterval::iterator I = std::upper_bound(IP.first->begin(), @@ -530,41 +816,19 @@ static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){ } } -/// addStackInterval - Create a LiveInterval for stack if the specified live -/// interval has been spilled. -static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, - LiveIntervals *li_, float &Weight, - VirtRegMap &vrm_) { - int SS = vrm_.getStackSlot(cur->reg); - if (SS == VirtRegMap::NO_STACK_SLOT) - return; - LiveInterval &SI = ls_->getOrCreateInterval(SS); - SI.weight += Weight; - - VNInfo *VNI; - if (SI.getNumValNums()) - VNI = SI.getValNumInfo(0); - else - VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator()); - - LiveInterval &RI = li_->getInterval(cur->reg); - // FIXME: This may be overly conservative. - SI.MergeRangesInAsValue(RI, VNI); -} - /// getConflictWeight - Return the number of conflicts between cur /// live interval and defs and uses of Reg weighted by loop depthes. -static float getConflictWeight(LiveInterval *cur, unsigned Reg, - LiveIntervals *li_, - MachineRegisterInfo *mri_, - const MachineLoopInfo *loopInfo) { +static +float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, + MachineRegisterInfo *mri_, + MachineLoopInfo *loopInfo) { float Conflicts = 0; for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), E = mri_->reg_end(); I != E; ++I) { MachineInstr *MI = &*I; if (cur->liveAt(li_->getInstructionIndex(MI))) { unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); - Conflicts += powf(10.0f, (float)loopDepth); + Conflicts += std::pow(10.0f, (float)loopDepth); } } return Conflicts; @@ -584,11 +848,13 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur, float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; SmallVector SLIs[3]; - DOUT << "\tConsidering " << NumCands << " candidates: "; - DEBUG(for (unsigned i = 0; i != NumCands; ++i) - DOUT << tri_->getName(Candidates[i].first) << " "; - DOUT << "\n";); - + DEBUG({ + dbgs() << "\tConsidering " << NumCands << " candidates: "; + for (unsigned i = 0; i != NumCands; ++i) + dbgs() << tri_->getName(Candidates[i].first) << " "; + dbgs() << "\n"; + }); + // Calculate the number of conflicts of each candidate. for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { unsigned Reg = i->first->reg; @@ -636,9 +902,15 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur, namespace { struct WeightCompare { + private: + const RALinScan &Allocator; + + public: + WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {} + typedef std::pair RegWeightPair; bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { - return LHS.second < RHS.second; + return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first); } }; } @@ -653,70 +925,115 @@ static bool weightsAreClose(float w1, float w2) { return (diff / w2) <= 0.05f; // Within 5%. } +LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) { + DenseMap::iterator I = NextReloadMap.find(cur->reg); + if (I == NextReloadMap.end()) + return 0; + return &li_->getInterval(I->second); +} + +void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) { + bool isNew = DowngradedRegs.insert(Reg); + isNew = isNew; // Silence compiler warning. + assert(isNew && "Multiple reloads holding the same register?"); + DowngradeMap.insert(std::make_pair(li->reg, Reg)); + for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) { + isNew = DowngradedRegs.insert(*AS); + isNew = isNew; // Silence compiler warning. + assert(isNew && "Multiple reloads holding the same register?"); + DowngradeMap.insert(std::make_pair(li->reg, *AS)); + } + ++NumDowngrade; +} + +void RALinScan::UpgradeRegister(unsigned Reg) { + if (Reg) { + DowngradedRegs.erase(Reg); + for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) + DowngradedRegs.erase(*AS); + } +} + +namespace { + struct LISorter { + bool operator()(LiveInterval* A, LiveInterval* B) { + return A->beginIndex() < B->beginIndex(); + } + }; +} + /// assignRegOrStackSlotAtInterval - assign a register if one is available, or /// spill. -void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) -{ - DOUT << "\tallocating current interval: "; +void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + DEBUG(dbgs() << "\tallocating current interval from " + << RC->getName() << ": "); // This is an implicitly defined live interval, just assign any register. - const TargetRegisterClass *RC = reginfo_->getRegClass(cur->reg); if (cur->empty()) { - unsigned physReg = cur->preference; + unsigned physReg = vrm_->getRegAllocPref(cur->reg); if (!physReg) - physReg = *RC->allocation_order_begin(*mf_); - DOUT << tri_->getName(physReg) << '\n'; + physReg = getFirstNonReservedPhysReg(RC); + DEBUG(dbgs() << tri_->getName(physReg) << '\n'); // Note the register is not really in use. vrm_->assignVirt2Phys(cur->reg, physReg); return; } - PhysRegTracker backupPrt = *prt_; + backUpRegUses(); std::vector > SpillWeightsToAdd; - unsigned StartPosition = cur->beginNumber(); + SlotIndex StartPosition = cur->beginIndex(); const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - // If this live interval is defined by a move instruction and its source is - // assigned a physical register that is compatible with the target register - // class, then we should try to assign it the same register. + // If start of this live interval is defined by a move instruction and its + // source is assigned a physical register that is compatible with the target + // register class, then we should try to assign it the same register. // This can happen when the move is from a larger register class to a smaller // one, e.g. X86::mov32to32_. These move instructions are not coalescable. - if (!cur->preference && cur->containsOneValue()) { - VNInfo *vni = cur->getValNumInfo(0); - if (vni->def && vni->def != ~1U && vni->def != ~0U) { + if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { + VNInfo *vni = cur->begin()->valno; + if (!vni->isUnused()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - unsigned SrcReg, DstReg; - if (CopyMI && tii_->isMoveInstr(*CopyMI, SrcReg, DstReg)) { + if (CopyMI && CopyMI->isCopy()) { + unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); + unsigned SrcReg = CopyMI->getOperand(1).getReg(); + unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg(); unsigned Reg = 0; if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) Reg = SrcReg; else if (vrm_->isAssignedReg(SrcReg)) Reg = vrm_->getPhys(SrcReg); - if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) - cur->preference = Reg; + if (Reg) { + if (SrcSubReg) + Reg = tri_->getSubReg(Reg, SrcSubReg); + if (DstSubReg) + Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); + if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) + mri_->setRegAllocationHint(cur->reg, 0, Reg); + } } } } - // for every interval in inactive we overlap with, mark the + // For every interval in inactive we overlap with, mark the // register as not free and update spill weights. for (IntervalPtrs::const_iterator i = inactive_.begin(), e = inactive_.end(); i != e; ++i) { unsigned Reg = i->first->reg; assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only allocate virtual registers!"); - const TargetRegisterClass *RegRC = reginfo_->getRegClass(Reg); - // If this is not in a related reg class to the register we're allocating, + const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); + // If this is not in a related reg class to the register we're allocating, // don't check it. if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && cur->overlapsFrom(*i->first, i->second-1)) { Reg = vrm_->getPhys(Reg); - prt_->addRegUse(Reg); + addRegUse(Reg); SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); } } - + // Speculatively check to see if we can get a register right now. If not, // we know we won't be able to by adding more constraints. If so, we can // check to see if it is valid. Doing an exhaustive search of the fixed_ list @@ -731,7 +1048,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) SmallSet RegAliases; for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) RegAliases.insert(*AS); - + bool ConflictsWithFixed = false; for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { IntervalPtr &IP = fixed_[i]; @@ -739,7 +1056,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // Okay, this reg is on the fixed list. Check to see if we actually // conflict. LiveInterval *I = IP.first; - if (I->endNumber() > StartPosition) { + if (I->endIndex() > StartPosition) { LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); IP.second = II; if (II != I->begin() && II->start > StartPosition) @@ -751,10 +1068,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) } } } - + // Okay, the register picked by our speculative getFreePhysReg call turned // out to be in use. Actually add all of the conflicting fixed registers to - // prt so we can do an accurate query. + // regUse_ so we can do an accurate query. if (ConflictsWithFixed) { // For every interval in fixed we overlap with, mark the register as not // free and update spill weights. @@ -763,49 +1080,58 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) LiveInterval *I = IP.first; const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - I->endNumber() > StartPosition) { + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + I->endIndex() > StartPosition) { LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); IP.second = II; if (II != I->begin() && II->start > StartPosition) --II; if (cur->overlapsFrom(*I, II)) { unsigned reg = I->reg; - prt_->addRegUse(reg); + addRegUse(reg); SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight)); } } } - // Using the newly updated prt_ object, which includes conflicts in the + // Using the newly updated regUse_ object, which includes conflicts in the // future, see if there are any registers available. physReg = getFreePhysReg(cur); } } - + // Restore the physical register tracker, removing information about the // future. - *prt_ = backupPrt; - - // if we find a free register, we are done: assign this virtual to + restoreRegUses(); + + // If we find a free register, we are done: assign this virtual to // the free physical register and add this interval to the active // list. if (physReg) { - DOUT << tri_->getName(physReg) << '\n'; + DEBUG(dbgs() << tri_->getName(physReg) << '\n'); vrm_->assignVirt2Phys(cur->reg, physReg); - prt_->addRegUse(physReg); + addRegUse(physReg); active_.push_back(std::make_pair(cur, cur->begin())); handled_.push_back(cur); + + // "Upgrade" the physical register since it has been allocated. + UpgradeRegister(physReg); + if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { + // "Downgrade" physReg to try to keep physReg from being allocated until + // the next reload from the same SS is allocated. + mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); + DowngradeRegister(cur, physReg); + } return; } - DOUT << "no free registers\n"; + DEBUG(dbgs() << "no free registers\n"); // Compile the spill weights into an array that is better for scanning. std::vector SpillWeights(tri_->getNumRegs(), 0.0f); for (std::vector >::iterator I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) - updateSpillWeights(SpillWeights, I->first, I->second, tri_); - + updateSpillWeights(SpillWeights, I->first, I->second, RC); + // for each interval in active, update spill weights. for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); i != e; ++i) { @@ -813,14 +1139,14 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); - updateSpillWeights(SpillWeights, reg, i->first->weight, tri_); + updateSpillWeights(SpillWeights, reg, i->first->weight, RC); } - - DOUT << "\tassigning stack slot at interval "<< *cur << ":\n"; + + DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); // Find a register to spill. float minWeight = HUGE_VALF; - unsigned minReg = 0; /*cur->preference*/; // Try the preferred register first. + unsigned minReg = 0; bool Found = false; std::vector > RegsWeights; @@ -829,16 +1155,22 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) e = RC->allocation_order_end(*mf_); i != e; ++i) { unsigned reg = *i; float regWeight = SpillWeights[reg]; - if (minWeight > regWeight) + // Don't even consider reserved regs. + if (reservedRegs_.test(reg)) + continue; + // Skip recently allocated registers and reserved registers. + if (minWeight > regWeight && !isRecentlyUsed(reg)) Found = true; RegsWeights.push_back(std::make_pair(reg, regWeight)); } - + // If we didn't find a register that is spillable, try aliases? if (!Found) { for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), e = RC->allocation_order_end(*mf_); i != e; ++i) { unsigned reg = *i; + if (reservedRegs_.test(reg)) + continue; // No need to worry about if the alias register size < regsize of RC. // We are going to spill all registers that alias it anyway. for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) @@ -847,16 +1179,33 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) } // Sort all potential spill candidates by weight. - std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare()); + std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this)); minReg = RegsWeights[0].first; minWeight = RegsWeights[0].second; if (minWeight == HUGE_VALF) { // All registers must have inf weight. Just grab one! - minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_); + minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC); if (cur->weight == HUGE_VALF || - li_->getApproximateInstructionCount(*cur) == 0) + li_->getApproximateInstructionCount(*cur) == 0) { // Spill a physical register around defs and uses. - li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_); + if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) { + // spillPhysRegAroundRegDefsUses may have invalidated iterator stored + // in fixed_. Reset them. + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + LiveInterval *I = IP.first; + if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg)) + IP.second = I->advanceTo(I->begin(), StartPosition); + } + + DowngradedRegs.clear(); + assignRegOrStackSlotAtInterval(cur); + } else { + assert(false && "Ran out of registers during register allocation!"); + report_fatal_error("Ran out of registers during register allocation!"); + } + return; + } } // Find up to 3 registers to consider as spill candidates. @@ -867,34 +1216,61 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) --LastCandidate; } - DOUT << "\t\tregister(s) with min weight(s): "; - DEBUG(for (unsigned i = 0; i != LastCandidate; ++i) - DOUT << tri_->getName(RegsWeights[i].first) - << " (" << RegsWeights[i].second << ")\n"); + DEBUG({ + dbgs() << "\t\tregister(s) with min weight(s): "; + + for (unsigned i = 0; i != LastCandidate; ++i) + dbgs() << tri_->getName(RegsWeights[i].first) + << " (" << RegsWeights[i].second << ")\n"; + }); - // if the current has the minimum weight, we need to spill it and + // If the current has the minimum weight, we need to spill it and // add any added intervals back to unhandled, and restart // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { - DOUT << "\t\t\tspilling(c): " << *cur << '\n'; - float SSWeight; - std::vector added = - li_->addIntervalsForSpills(*cur, loopInfo, *vrm_, SSWeight); - addStackInterval(cur, ls_, li_, SSWeight, *vrm_); + DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); + SmallVector spillIs, added; + spiller_->spill(cur, added, spillIs); + + std::sort(added.begin(), added.end(), LISorter()); if (added.empty()) return; // Early exit if all spills were folded. - // Merge added with unhandled. Note that we know that - // addIntervalsForSpills returns intervals sorted by their starting + // Merge added with unhandled. Note that we have already sorted + // intervals returned by addIntervalsForSpills by their starting // point. - for (unsigned i = 0, e = added.size(); i != e; ++i) - unhandled_.push(added[i]); + // This also update the NextReloadMap. That is, it adds mapping from a + // register defined by a reload from SS to the next reload from SS in the + // same basic block. + MachineBasicBlock *LastReloadMBB = 0; + LiveInterval *LastReload = 0; + int LastReloadSS = VirtRegMap::NO_STACK_SLOT; + for (unsigned i = 0, e = added.size(); i != e; ++i) { + LiveInterval *ReloadLi = added[i]; + if (ReloadLi->weight == HUGE_VALF && + li_->getApproximateInstructionCount(*ReloadLi) == 0) { + SlotIndex ReloadIdx = ReloadLi->beginIndex(); + MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); + int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); + if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { + // Last reload of same SS is in the same MBB. We want to try to + // allocate both reloads the same register and make sure the reg + // isn't clobbered in between if at all possible. + assert(LastReload->beginIndex() < ReloadIdx); + NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); + } + LastReloadMBB = ReloadMBB; + LastReload = ReloadLi; + LastReloadSS = ReloadSS; + } + unhandled_.push(ReloadLi); + } return; } ++NumBacktracks; - // push the current interval back to unhandled since we are going + // Push the current interval back to unhandled since we are going // to re-run at least this iteration. Since we didn't modify it it // should go back right in the front of the list unhandled_.push(cur); @@ -915,28 +1291,33 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // The earliest start of a Spilled interval indicates up to where // in handled we need to roll back - unsigned earliestStart = cur->beginNumber(); + assert(!spillIs.empty() && "No spill intervals?"); + SlotIndex earliestStart = spillIs[0]->beginIndex(); // Spill live intervals of virtual regs mapped to the physical register we // want to clear (and its aliases). We only spill those that overlap with the // current interval as the rest do not affect its allocation. we also keep // track of the earliest start of all spilled live intervals since this will // mark our rollback point. - std::vector added; + SmallVector added; while (!spillIs.empty()) { LiveInterval *sli = spillIs.back(); spillIs.pop_back(); - DOUT << "\t\t\tspilling(a): " << *sli << '\n'; - earliestStart = std::min(earliestStart, sli->beginNumber()); - float SSWeight; - std::vector newIs = - li_->addIntervalsForSpills(*sli, loopInfo, *vrm_, SSWeight); - addStackInterval(sli, ls_, li_, SSWeight, *vrm_); - std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); + DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); + if (sli->beginIndex() < earliestStart) + earliestStart = sli->beginIndex(); + spiller_->spill(sli, added, spillIs); spilled.insert(sli->reg); } - DOUT << "\t\trolling back to: " << earliestStart << '\n'; + // Include any added intervals in earliestStart. + for (unsigned i = 0, e = added.size(); i != e; ++i) { + SlotIndex SI = added[i]->beginIndex(); + if (SI < earliestStart) + earliestStart = SI; + } + + DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); // Scan handled in reverse order up to the earliest start of a // spilled live interval and undo each one, restoring the state of @@ -944,20 +1325,20 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) while (!handled_.empty()) { LiveInterval* i = handled_.back(); // If this interval starts before t we are done. - if (i->beginNumber() < earliestStart) + if (!i->empty() && i->beginIndex() < earliestStart) break; - DOUT << "\t\t\tundo changes for: " << *i << '\n'; + DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n'); handled_.pop_back(); // When undoing a live interval allocation we must know if it is active or - // inactive to properly update the PhysRegTracker and the VirtRegMap. + // inactive to properly update regUse_ and the VirtRegMap. IntervalPtrs::iterator it; if ((it = FindIntervalInVector(active_, i)) != active_.end()) { active_.erase(it); assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); if (!spilled.count(i->reg)) unhandled_.push(i); - prt_->delRegUse(vrm_->getPhys(i->reg)); + delRegUse(vrm_->getPhys(i->reg)); vrm_->clearVirt(i->reg); } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { inactive_.erase(it); @@ -972,10 +1353,15 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) unhandled_.push(i); } - // It interval has a preference, it must be defined by a copy. Clear the - // preference now since the source interval allocation may have been undone - // as well. - i->preference = 0; + DenseMap::iterator ii = DowngradeMap.find(i->reg); + if (ii == DowngradeMap.end()) + // It interval has a preference, it must be defined by a copy. Clear the + // preference now since the source interval allocation may have been + // undone as well. + mri_->setRegAllocationHint(i->reg, 0, 0); + else { + UpgradeRegister(ii->second); + } } // Rewind the iterators in the active, inactive, and fixed lists back to the @@ -984,103 +1370,170 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) RevertVectorIteratorsTo(inactive_, earliestStart); RevertVectorIteratorsTo(fixed_, earliestStart); - // scan the rest and undo each interval that expired after t and + // Scan the rest and undo each interval that expired after t and // insert it in active (the next iteration of the algorithm will // put it in inactive if required) for (unsigned i = 0, e = handled_.size(); i != e; ++i) { LiveInterval *HI = handled_[i]; if (!HI->expiredAt(earliestStart) && - HI->expiredAt(cur->beginNumber())) { - DOUT << "\t\t\tundo changes for: " << *HI << '\n'; + HI->expiredAt(cur->beginIndex())) { + DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n'); active_.push_back(std::make_pair(HI, HI->begin())); assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); - prt_->addRegUse(vrm_->getPhys(HI->reg)); + addRegUse(vrm_->getPhys(HI->reg)); } } - // merge added with unhandled - for (unsigned i = 0, e = added.size(); i != e; ++i) - unhandled_.push(added[i]); -} - -/// getFreePhysReg - return a free physical register for this virtual register -/// interval if we have one, otherwise return 0. -unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { - SmallVector inactiveCounts; - unsigned MaxInactiveCount = 0; - - const TargetRegisterClass *RC = reginfo_->getRegClass(cur->reg); - const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - - for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); - i != e; ++i) { - unsigned reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - - // If this is not in a related reg class to the register we're allocating, - // don't check it. - const TargetRegisterClass *RegRC = reginfo_->getRegClass(reg); - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { - reg = vrm_->getPhys(reg); - if (inactiveCounts.size() <= reg) - inactiveCounts.resize(reg+1); - ++inactiveCounts[reg]; - MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); + // Merge added with unhandled. + // This also update the NextReloadMap. That is, it adds mapping from a + // register defined by a reload from SS to the next reload from SS in the + // same basic block. + MachineBasicBlock *LastReloadMBB = 0; + LiveInterval *LastReload = 0; + int LastReloadSS = VirtRegMap::NO_STACK_SLOT; + std::sort(added.begin(), added.end(), LISorter()); + for (unsigned i = 0, e = added.size(); i != e; ++i) { + LiveInterval *ReloadLi = added[i]; + if (ReloadLi->weight == HUGE_VALF && + li_->getApproximateInstructionCount(*ReloadLi) == 0) { + SlotIndex ReloadIdx = ReloadLi->beginIndex(); + MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); + int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); + if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { + // Last reload of same SS is in the same MBB. We want to try to + // allocate both reloads the same register and make sure the reg + // isn't clobbered in between if at all possible. + assert(LastReload->beginIndex() < ReloadIdx); + NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); + } + LastReloadMBB = ReloadMBB; + LastReload = ReloadLi; + LastReloadSS = ReloadSS; } + unhandled_.push(ReloadLi); } +} +unsigned RALinScan::getFreePhysReg(LiveInterval* cur, + const TargetRegisterClass *RC, + unsigned MaxInactiveCount, + SmallVector &inactiveCounts, + bool SkipDGRegs) { unsigned FreeReg = 0; unsigned FreeRegInactiveCount = 0; - // If copy coalescer has assigned a "preferred" register, check if it's - // available first. - if (cur->preference) { - if (prt_->isRegAvail(cur->preference)) { - DOUT << "\t\tassigned the preferred register: " - << tri_->getName(cur->preference) << "\n"; - return cur->preference; - } else - DOUT << "\t\tunable to assign the preferred register: " - << tri_->getName(cur->preference) << "\n"; - } + std::pair Hint = mri_->getRegAllocationHint(cur->reg); + // Resolve second part of the hint (if possible) given the current allocation. + unsigned physReg = Hint.second; + if (physReg && + TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) + physReg = vrm_->getPhys(physReg); - // Scan for the first available register. - TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_); - TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_); + TargetRegisterClass::iterator I, E; + tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_); assert(I != E && "No allocatable register in this register class!"); - for (; I != E; ++I) - if (prt_->isRegAvail(*I)) { - FreeReg = *I; + + // Scan for the first available register. + for (; I != E; ++I) { + unsigned Reg = *I; + // Ignore "downgraded" registers. + if (SkipDGRegs && DowngradedRegs.count(Reg)) + continue; + // Skip reserved registers. + if (reservedRegs_.test(Reg)) + continue; + // Skip recently allocated registers. + if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { + FreeReg = Reg; if (FreeReg < inactiveCounts.size()) FreeRegInactiveCount = inactiveCounts[FreeReg]; else FreeRegInactiveCount = 0; break; } + } // If there are no free regs, or if this reg has the max inactive count, // return this register. - if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) return FreeReg; - + if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) { + // Remember what register we picked so we can skip it next time. + if (FreeReg != 0) recordRecentlyUsed(FreeReg); + return FreeReg; + } + // Continue scanning the registers, looking for the one with the highest // inactive count. Alkis found that this reduced register pressure very // slightly on X86 (in rev 1.94 of this file), though this should probably be // reevaluated now. for (; I != E; ++I) { unsigned Reg = *I; - if (prt_->isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg]) { + // Ignore "downgraded" registers. + if (SkipDGRegs && DowngradedRegs.count(Reg)) + continue; + // Skip reserved registers. + if (reservedRegs_.test(Reg)) + continue; + if (isRegAvail(Reg) && Reg < inactiveCounts.size() && + FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { FreeReg = Reg; FreeRegInactiveCount = inactiveCounts[Reg]; if (FreeRegInactiveCount == MaxInactiveCount) break; // We found the one with the max inactive count. } } - + + // Remember what register we picked so we can skip it next time. + recordRecentlyUsed(FreeReg); + return FreeReg; } +/// getFreePhysReg - return a free physical register for this virtual register +/// interval if we have one, otherwise return 0. +unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { + SmallVector inactiveCounts; + unsigned MaxInactiveCount = 0; + + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); + + for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); + i != e; ++i) { + unsigned reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + + // If this is not in a related reg class to the register we're allocating, + // don't check it. + const TargetRegisterClass *RegRC = mri_->getRegClass(reg); + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { + reg = vrm_->getPhys(reg); + if (inactiveCounts.size() <= reg) + inactiveCounts.resize(reg+1); + ++inactiveCounts[reg]; + MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); + } + } + + // If copy coalescer has assigned a "preferred" register, check if it's + // available first. + unsigned Preference = vrm_->getRegAllocPref(cur->reg); + if (Preference) { + DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); + if (isRegAvail(Preference) && + RC->contains(Preference)) + return Preference; + } + + if (!DowngradedRegs.empty()) { + unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, + true); + if (FreeReg) + return FreeReg; + } + return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); +} + FunctionPass* llvm::createLinearScanRegisterAllocator() { return new RALinScan(); }