X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocLinearScan.cpp;h=a9444619fa5d239120a79ad663911d361e09af15;hb=b0000c376cf13ed63306622ab9642cfae49f074a;hp=b9ae8ac5a73960849e82bc601d538d7d59a5cb8e;hpb=eb577ba3b815a1fa4627b060dd2345d17abf672d;p=oota-llvm.git diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index b9ae8ac5a73..a9444619fa5 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by the LLVM research group and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -12,59 +12,76 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "PhysRegTracker.h" #include "VirtRegMap.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/SSARegMap.h" -#include "llvm/Target/MRegisterInfo.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Visibility.h" +#include "llvm/Support/Compiler.h" #include -#include -#include #include #include #include +#include using namespace llvm; -namespace { +STATISTIC(NumIters , "Number of iterations performed"); +STATISTIC(NumBacktracks, "Number of times we had to backtrack"); +STATISTIC(NumCoalesce, "Number of copies coalesced"); + +static cl::opt +NewHeuristic("new-spilling-heuristic", + cl::desc("Use new spilling heuristic"), + cl::init(false), cl::Hidden); - static Statistic efficiency - ("regalloc", "Ratio of intervals processed over total intervals"); - static Statistic<> NumBacktracks - ("regalloc", "Number of times we had to backtrack"); +static cl::opt +PreSplitIntervals("pre-alloc-split", + cl::desc("Pre-register allocation live interval splitting"), + cl::init(false), cl::Hidden); - static RegisterRegAlloc - linearscanRegAlloc("linearscan", " linear scan register allocator", - createLinearScanRegisterAllocator); +static RegisterRegAlloc +linearscanRegAlloc("linearscan", "linear scan register allocator", + createLinearScanRegisterAllocator); - static unsigned numIterations = 0; - static unsigned numIntervals = 0; +namespace { + struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass { + static char ID; + RALinScan() : MachineFunctionPass(&ID) {} - struct VISIBILITY_HIDDEN RA : public MachineFunctionPass { typedef std::pair IntervalPtr; - typedef std::vector IntervalPtrs; + typedef SmallVector IntervalPtrs; private: /// RelatedRegClasses - This structure is built the first time a function is /// compiled, and keeps track of which register classes have registers that /// belong to multiple classes or have aliases that are in other classes. EquivalenceClasses RelatedRegClasses; - std::map OneClassForEachPhysReg; + DenseMap OneClassForEachPhysReg; MachineFunction* mf_; + MachineRegisterInfo* mri_; const TargetMachine* tm_; - const MRegisterInfo* mri_; + const TargetRegisterInfo* tri_; + const TargetInstrInfo* tii_; + BitVector allocatableRegs_; LiveIntervals* li_; - bool *PhysRegsUsed; + LiveStacks* ls_; + const MachineLoopInfo *loopInfo; /// handled_ - Intervals are added to the handled_ set in the order of their /// start value. This is uses for backtracking. @@ -83,7 +100,7 @@ namespace { IntervalPtrs inactive_; typedef std::priority_queue, + SmallVector, greater_ptr > IntervalHeap; IntervalHeap unhandled_; std::auto_ptr prt_; @@ -97,6 +114,18 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + if (StrongPHIElim) + AU.addRequiredID(StrongPHIEliminationID); + // Make sure PassManager knows which analyses to make available + // to coalescing and which analyses coalescing invalidates. + AU.addRequiredTransitive(); + if (PreSplitIntervals) + AU.addRequiredID(PreAllocSplittingID); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -123,6 +152,24 @@ namespace { /// is available, or spill. void assignRegOrStackSlotAtInterval(LiveInterval* cur); + /// findIntervalsToSpill - Determine the intervals to spill for the + /// specified interval. It's passed the physical registers whose spill + /// weight is the lowest among all the registers whose live intervals + /// conflict with the interval. + void findIntervalsToSpill(LiveInterval *cur, + std::vector > &Candidates, + unsigned NumCands, + SmallVector &SpillIntervals); + + /// attemptTrivialCoalescing - If a simple interval is defined by a copy, + /// try allocate the definition the same register as the source register + /// if the register is not defined during live time of the interval. This + /// eliminate a copy. This is used to coalesce copies which were not + /// coalesced away before allocation either due to dest and src being in + /// different register classes or because the coalescer was overly + /// conservative. + unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); + /// /// register handling helpers /// @@ -139,31 +186,35 @@ namespace { template void printIntervals(const char* const str, ItTy i, ItTy e) const { - if (str) std::cerr << str << " intervals:\n"; + if (str) DOUT << str << " intervals:\n"; for (; i != e; ++i) { - std::cerr << "\t" << *i->first << " -> "; + DOUT << "\t" << *i->first << " -> "; unsigned reg = i->first->reg; - if (MRegisterInfo::isVirtualRegister(reg)) { + if (TargetRegisterInfo::isVirtualRegister(reg)) { reg = vrm_->getPhys(reg); } - std::cerr << mri_->getName(reg) << '\n'; + DOUT << tri_->getName(reg) << '\n'; } } }; + char RALinScan::ID = 0; } -void RA::ComputeRelatedRegClasses() { - const MRegisterInfo &MRI = *mri_; +static RegisterPass +X("linearscan-regalloc", "Linear Scan Register Allocator"); + +void RALinScan::ComputeRelatedRegClasses() { + const TargetRegisterInfo &TRI = *tri_; // First pass, add all reg classes to the union, and determine at least one // reg class that each register is in. bool HasAliases = false; - for (MRegisterInfo::regclass_iterator RCI = MRI.regclass_begin(), - E = MRI.regclass_end(); RCI != E; ++RCI) { + for (TargetRegisterInfo::regclass_iterator RCI = TRI.regclass_begin(), + E = TRI.regclass_end(); RCI != E; ++RCI) { RelatedRegClasses.insert(*RCI); for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); I != E; ++I) { - HasAliases = HasAliases || *MRI.getAliasSet(*I) != 0; + HasAliases = HasAliases || *TRI.getAliasSet(*I) != 0; const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; if (PRC) { @@ -180,28 +231,79 @@ void RA::ComputeRelatedRegClasses() { // belongs to, add info about aliases. We don't need to do this for targets // without register aliases. if (HasAliases) - for (std::map::iterator + for (DenseMap::iterator I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); I != E; ++I) - for (const unsigned *AS = MRI.getAliasSet(I->first); *AS; ++AS) + for (const unsigned *AS = TRI.getAliasSet(I->first); *AS; ++AS) RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); } -bool RA::runOnMachineFunction(MachineFunction &fn) { +/// attemptTrivialCoalescing - If a simple interval is defined by a copy, +/// try allocate the definition the same register as the source register +/// if the register is not defined during live time of the interval. This +/// eliminate a copy. This is used to coalesce copies which were not +/// coalesced away before allocation either due to dest and src being in +/// different register classes or because the coalescer was overly +/// conservative. +unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { + if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue()) + return Reg; + + VNInfo *vni = cur.begin()->valno; + if (!vni->def || vni->def == ~1U || vni->def == ~0U) + return Reg; + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (!CopyMI || + !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + return Reg; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!vrm_->isAssignedReg(SrcReg)) + return Reg; + else + SrcReg = vrm_->getPhys(SrcReg); + } + if (Reg == SrcReg) + return Reg; + + const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); + if (!RC->contains(SrcReg)) + return Reg; + + // Try to coalesce. + if (!li_->conflictsWithPhysRegDef(cur, *vrm_, SrcReg)) { + DOUT << "Coalescing: " << cur << " -> " << tri_->getName(SrcReg) + << '\n'; + vrm_->clearVirt(cur.reg); + vrm_->assignVirt2Phys(cur.reg, SrcReg); + ++NumCoalesce; + return SrcReg; + } + + return Reg; +} + +bool RALinScan::runOnMachineFunction(MachineFunction &fn) { mf_ = &fn; + mri_ = &fn.getRegInfo(); tm_ = &fn.getTarget(); - mri_ = tm_->getRegisterInfo(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + allocatableRegs_ = tri_->getAllocatableSet(fn); li_ = &getAnalysis(); + ls_ = &getAnalysis(); + loopInfo = &getAnalysis(); + + // We don't run the coalescer here because we have no reason to + // interact with it. If the coalescer requires interaction, it + // won't do anything. If it doesn't require interaction, we assume + // it was run as a separate pass. // If this is the first function compiled, compute the related reg classes. if (RelatedRegClasses.empty()) ComputeRelatedRegClasses(); - PhysRegsUsed = new bool[mri_->getNumRegs()]; - std::fill(PhysRegsUsed, PhysRegsUsed+mri_->getNumRegs(), false); - fn.setUsedPhysRegs(PhysRegsUsed); - - if (!prt_.get()) prt_.reset(new PhysRegTracker(*mri_)); + if (!prt_.get()) prt_.reset(new PhysRegTracker(*tri_)); vrm_.reset(new VirtRegMap(*mf_)); if (!spiller_.get()) spiller_.reset(createSpiller()); @@ -211,11 +313,9 @@ bool RA::runOnMachineFunction(MachineFunction &fn) { // Rewrite spill code and update the PhysRegsUsed set. spiller_->runOnMachineFunction(*mf_, *vrm_); - vrm_.reset(); // Free the VirtRegMap - - while (!unhandled_.empty()) unhandled_.pop(); + assert(unhandled_.empty() && "Unhandled live intervals remain!"); fixed_.clear(); active_.clear(); inactive_.clear(); @@ -226,45 +326,45 @@ bool RA::runOnMachineFunction(MachineFunction &fn) { /// initIntervalSets - initialize the interval sets. /// -void RA::initIntervalSets() +void RALinScan::initIntervalSets() { assert(unhandled_.empty() && fixed_.empty() && active_.empty() && inactive_.empty() && "interval sets should be empty on initialization"); + handled_.reserve(li_->getNumIntervals()); + for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - if (MRegisterInfo::isPhysicalRegister(i->second.reg)) { - PhysRegsUsed[i->second.reg] = true; - fixed_.push_back(std::make_pair(&i->second, i->second.begin())); + if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { + mri_->setPhysRegUsed(i->second->reg); + fixed_.push_back(std::make_pair(i->second, i->second->begin())); } else - unhandled_.push(&i->second); + unhandled_.push(i->second); } } -void RA::linearScan() +void RALinScan::linearScan() { // linear scan algorithm - DEBUG(std::cerr << "********** LINEAR SCAN **********\n"); - DEBUG(std::cerr << "********** Function: " - << mf_->getFunction()->getName() << '\n'); + DOUT << "********** LINEAR SCAN **********\n"; + DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n'; - // DEBUG(printIntervals("unhandled", unhandled_.begin(), unhandled_.end())); DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end())); - DEBUG(printIntervals("active", active_.begin(), active_.end())); - DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end())); while (!unhandled_.empty()) { // pick the interval with the earliest start point LiveInterval* cur = unhandled_.top(); unhandled_.pop(); - ++numIterations; - DEBUG(std::cerr << "\n*** CURRENT ***: " << *cur << '\n'); + ++NumIters; + DOUT << "\n*** CURRENT ***: " << *cur << '\n'; - processActiveIntervals(cur->beginNumber()); - processInactiveIntervals(cur->beginNumber()); + if (!cur->empty()) { + processActiveIntervals(cur->beginNumber()); + processInactiveIntervals(cur->beginNumber()); - assert(MRegisterInfo::isVirtualRegister(cur->reg) && - "Can only allocate virtual registers!"); + assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && + "Can only allocate virtual registers!"); + } // Allocating a virtual register. try to find a free // physical register or spill an interval (possibly this one) in order to @@ -274,36 +374,61 @@ void RA::linearScan() DEBUG(printIntervals("active", active_.begin(), active_.end())); DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end())); } - numIntervals += li_->getNumIntervals(); - efficiency = double(numIterations) / double(numIntervals); // expire any remaining active intervals - for (IntervalPtrs::reverse_iterator - i = active_.rbegin(); i != active_.rend(); ) { - unsigned reg = i->first->reg; - DEBUG(std::cerr << "\tinterval " << *i->first << " expired\n"); - assert(MRegisterInfo::isVirtualRegister(reg) && + while (!active_.empty()) { + IntervalPtr &IP = active_.back(); + unsigned reg = IP.first->reg; + DOUT << "\tinterval " << *IP.first << " expired\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); prt_->delRegUse(reg); - i = IntervalPtrs::reverse_iterator(active_.erase(i.base()-1)); + active_.pop_back(); } // expire any remaining inactive intervals - for (IntervalPtrs::reverse_iterator - i = inactive_.rbegin(); i != inactive_.rend(); ) { - DEBUG(std::cerr << "\tinterval " << *i->first << " expired\n"); - i = IntervalPtrs::reverse_iterator(inactive_.erase(i.base()-1)); + DEBUG(for (IntervalPtrs::reverse_iterator + i = inactive_.rbegin(); i != inactive_.rend(); ++i) + DOUT << "\tinterval " << *i->first << " expired\n"); + inactive_.clear(); + + // Add live-ins to every BB except for entry. Also perform trivial coalescing. + MachineFunction::iterator EntryMBB = mf_->begin(); + SmallVector LiveInMBBs; + for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { + LiveInterval &cur = *i->second; + unsigned Reg = 0; + bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg); + if (isPhys) + Reg = cur.reg; + else if (vrm_->isAssignedReg(cur.reg)) + Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg)); + if (!Reg) + continue; + // Ignore splited live intervals. + if (!isPhys && vrm_->getPreSplitReg(cur.reg)) + continue; + for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); + I != E; ++I) { + const LiveRange &LR = *I; + if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { + for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) + if (LiveInMBBs[i] != EntryMBB) + LiveInMBBs[i]->addLiveIn(Reg); + LiveInMBBs.clear(); + } + } } - DEBUG(std::cerr << *vrm_); + DOUT << *vrm_; } /// processActiveIntervals - expire old intervals and move non-overlapping ones /// to the inactive list. -void RA::processActiveIntervals(unsigned CurPoint) +void RALinScan::processActiveIntervals(unsigned CurPoint) { - DEBUG(std::cerr << "\tprocessing active intervals:\n"); + DOUT << "\tprocessing active intervals:\n"; for (unsigned i = 0, e = active_.size(); i != e; ++i) { LiveInterval *Interval = active_[i].first; @@ -313,8 +438,8 @@ void RA::processActiveIntervals(unsigned CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // Remove expired intervals. - DEBUG(std::cerr << "\t\tinterval " << *Interval << " expired\n"); - assert(MRegisterInfo::isVirtualRegister(reg) && + DOUT << "\t\tinterval " << *Interval << " expired\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); prt_->delRegUse(reg); @@ -326,8 +451,8 @@ void RA::processActiveIntervals(unsigned CurPoint) } else if (IntervalPos->start > CurPoint) { // Move inactive intervals to inactive list. - DEBUG(std::cerr << "\t\tinterval " << *Interval << " inactive\n"); - assert(MRegisterInfo::isVirtualRegister(reg) && + DOUT << "\t\tinterval " << *Interval << " inactive\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); prt_->delRegUse(reg); @@ -347,9 +472,9 @@ void RA::processActiveIntervals(unsigned CurPoint) /// processInactiveIntervals - expire old intervals and move overlapping /// ones to the active list. -void RA::processInactiveIntervals(unsigned CurPoint) +void RALinScan::processInactiveIntervals(unsigned CurPoint) { - DEBUG(std::cerr << "\tprocessing inactive intervals:\n"); + DOUT << "\tprocessing inactive intervals:\n"; for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { LiveInterval *Interval = inactive_[i].first; @@ -359,7 +484,7 @@ void RA::processInactiveIntervals(unsigned CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // remove expired intervals. - DEBUG(std::cerr << "\t\tinterval " << *Interval << " expired\n"); + DOUT << "\t\tinterval " << *Interval << " expired\n"; // Pop off the end of the list. inactive_[i] = inactive_.back(); @@ -367,8 +492,8 @@ void RA::processInactiveIntervals(unsigned CurPoint) --i; --e; } else if (IntervalPos->start <= CurPoint) { // move re-activated intervals in active list - DEBUG(std::cerr << "\t\tinterval " << *Interval << " active\n"); - assert(MRegisterInfo::isVirtualRegister(reg) && + DOUT << "\t\tinterval " << *Interval << " active\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); prt_->addRegUse(reg); @@ -390,22 +515,24 @@ void RA::processInactiveIntervals(unsigned CurPoint) /// register and its weight. static void updateSpillWeights(std::vector &Weights, unsigned reg, float weight, - const MRegisterInfo *MRI) { + const TargetRegisterInfo *TRI) { Weights[reg] += weight; - for (const unsigned* as = MRI->getAliasSet(reg); *as; ++as) + for (const unsigned* as = TRI->getAliasSet(reg); *as; ++as) Weights[*as] += weight; } -static RA::IntervalPtrs::iterator FindIntervalInVector(RA::IntervalPtrs &IP, - LiveInterval *LI) { - for (RA::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); I != E; ++I) +static +RALinScan::IntervalPtrs::iterator +FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { + for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); + I != E; ++I) if (I->first == LI) return I; return IP.end(); } -static void RevertVectorIteratorsTo(RA::IntervalPtrs &V, unsigned Point) { +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){ for (unsigned i = 0, e = V.size(); i != e; ++i) { - RA::IntervalPtr &IP = V[i]; + RALinScan::IntervalPtr &IP = V[i]; LiveInterval::iterator I = std::upper_bound(IP.first->begin(), IP.second, Point); if (I != IP.first->begin()) --I; @@ -413,27 +540,184 @@ static void RevertVectorIteratorsTo(RA::IntervalPtrs &V, unsigned Point) { } } +/// addStackInterval - Create a LiveInterval for stack if the specified live +/// interval has been spilled. +static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, + LiveIntervals *li_, float &Weight, + VirtRegMap &vrm_) { + int SS = vrm_.getStackSlot(cur->reg); + if (SS == VirtRegMap::NO_STACK_SLOT) + return; + LiveInterval &SI = ls_->getOrCreateInterval(SS); + SI.weight += Weight; + + VNInfo *VNI; + if (SI.hasAtLeastOneValue()) + VNI = SI.getValNumInfo(0); + else + VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator()); + + LiveInterval &RI = li_->getInterval(cur->reg); + // FIXME: This may be overly conservative. + SI.MergeRangesInAsValue(RI, VNI); +} + +/// getConflictWeight - Return the number of conflicts between cur +/// live interval and defs and uses of Reg weighted by loop depthes. +static float getConflictWeight(LiveInterval *cur, unsigned Reg, + LiveIntervals *li_, + MachineRegisterInfo *mri_, + const MachineLoopInfo *loopInfo) { + float Conflicts = 0; + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), + E = mri_->reg_end(); I != E; ++I) { + MachineInstr *MI = &*I; + if (cur->liveAt(li_->getInstructionIndex(MI))) { + unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); + Conflicts += powf(10.0f, (float)loopDepth); + } + } + return Conflicts; +} + +/// findIntervalsToSpill - Determine the intervals to spill for the +/// specified interval. It's passed the physical registers whose spill +/// weight is the lowest among all the registers whose live intervals +/// conflict with the interval. +void RALinScan::findIntervalsToSpill(LiveInterval *cur, + std::vector > &Candidates, + unsigned NumCands, + SmallVector &SpillIntervals) { + // We have figured out the *best* register to spill. But there are other + // registers that are pretty good as well (spill weight within 3%). Spill + // the one that has fewest defs and uses that conflict with cur. + float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; + SmallVector SLIs[3]; + + DOUT << "\tConsidering " << NumCands << " candidates: "; + DEBUG(for (unsigned i = 0; i != NumCands; ++i) + DOUT << tri_->getName(Candidates[i].first) << " "; + DOUT << "\n";); + + // Calculate the number of conflicts of each candidate. + for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { + unsigned Reg = i->first->reg; + unsigned PhysReg = vrm_->getPhys(Reg); + if (!cur->overlapsFrom(*i->first, i->second)) + continue; + for (unsigned j = 0; j < NumCands; ++j) { + unsigned Candidate = Candidates[j].first; + if (tri_->regsOverlap(PhysReg, Candidate)) { + if (NumCands > 1) + Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); + SLIs[j].push_back(i->first); + } + } + } + + for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ + unsigned Reg = i->first->reg; + unsigned PhysReg = vrm_->getPhys(Reg); + if (!cur->overlapsFrom(*i->first, i->second-1)) + continue; + for (unsigned j = 0; j < NumCands; ++j) { + unsigned Candidate = Candidates[j].first; + if (tri_->regsOverlap(PhysReg, Candidate)) { + if (NumCands > 1) + Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); + SLIs[j].push_back(i->first); + } + } + } + + // Which is the best candidate? + unsigned BestCandidate = 0; + float MinConflicts = Conflicts[0]; + for (unsigned i = 1; i != NumCands; ++i) { + if (Conflicts[i] < MinConflicts) { + BestCandidate = i; + MinConflicts = Conflicts[i]; + } + } + + std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(), + std::back_inserter(SpillIntervals)); +} + +namespace { + struct WeightCompare { + typedef std::pair RegWeightPair; + bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { + return LHS.second < RHS.second; + } + }; +} + +static bool weightsAreClose(float w1, float w2) { + if (!NewHeuristic) + return false; + + float diff = w1 - w2; + if (diff <= 0.02f) // Within 0.02f + return true; + return (diff / w2) <= 0.05f; // Within 5%. +} + /// assignRegOrStackSlotAtInterval - assign a register if one is available, or /// spill. -void RA::assignRegOrStackSlotAtInterval(LiveInterval* cur) +void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { - DEBUG(std::cerr << "\tallocating current interval: "); + DOUT << "\tallocating current interval: "; + + // This is an implicitly defined live interval, just assign any register. + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + if (cur->empty()) { + unsigned physReg = cur->preference; + if (!physReg) + physReg = *RC->allocation_order_begin(*mf_); + DOUT << tri_->getName(physReg) << '\n'; + // Note the register is not really in use. + vrm_->assignVirt2Phys(cur->reg, physReg); + return; + } PhysRegTracker backupPrt = *prt_; std::vector > SpillWeightsToAdd; unsigned StartPosition = cur->beginNumber(); - const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg); const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - + + // If start of this live interval is defined by a move instruction and its + // source is assigned a physical register that is compatible with the target + // register class, then we should try to assign it the same register. + // This can happen when the move is from a larger register class to a smaller + // one, e.g. X86::mov32to32_. These move instructions are not coalescable. + if (!cur->preference && cur->hasAtLeastOneValue()) { + VNInfo *vni = cur->begin()->valno; + if (vni->def && vni->def != ~1U && vni->def != ~0U) { + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (CopyMI && + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) { + unsigned Reg = 0; + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + Reg = SrcReg; + else if (vrm_->isAssignedReg(SrcReg)) + Reg = vrm_->getPhys(SrcReg); + if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) + cur->preference = Reg; + } + } + } + // for every interval in inactive we overlap with, mark the // register as not free and update spill weights. for (IntervalPtrs::const_iterator i = inactive_.begin(), e = inactive_.end(); i != e; ++i) { unsigned Reg = i->first->reg; - assert(MRegisterInfo::isVirtualRegister(Reg) && + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only allocate virtual registers!"); - const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(Reg); + const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); // If this is not in a related reg class to the register we're allocating, // don't check it. if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && @@ -450,21 +734,21 @@ void RA::assignRegOrStackSlotAtInterval(LiveInterval* cur) // is very bad (it contains all callee clobbered registers for any functions // with a call), so we want to avoid doing that if possible. unsigned physReg = getFreePhysReg(cur); + unsigned BestPhysReg = physReg; if (physReg) { // We got a register. However, if it's in the fixed_ list, we might // conflict with it. Check to see if we conflict with it or any of its // aliases. - std::set RegAliases; - for (const unsigned *AS = mri_->getAliasSet(physReg); *AS; ++AS) + SmallSet RegAliases; + for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) RegAliases.insert(*AS); bool ConflictsWithFixed = false; for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - if (physReg == fixed_[i].first->reg || - RegAliases.count(fixed_[i].first->reg)) { + IntervalPtr &IP = fixed_[i]; + if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) { // Okay, this reg is on the fixed list. Check to see if we actually // conflict. - IntervalPtr &IP = fixed_[i]; LiveInterval *I = IP.first; if (I->endNumber() > StartPosition) { LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); @@ -518,76 +802,100 @@ void RA::assignRegOrStackSlotAtInterval(LiveInterval* cur) // the free physical register and add this interval to the active // list. if (physReg) { - DEBUG(std::cerr << mri_->getName(physReg) << '\n'); + DOUT << tri_->getName(physReg) << '\n'; vrm_->assignVirt2Phys(cur->reg, physReg); prt_->addRegUse(physReg); active_.push_back(std::make_pair(cur, cur->begin())); handled_.push_back(cur); return; } - DEBUG(std::cerr << "no free registers\n"); + DOUT << "no free registers\n"; // Compile the spill weights into an array that is better for scanning. - std::vector SpillWeights(mri_->getNumRegs(), 0.0); + std::vector SpillWeights(tri_->getNumRegs(), 0.0f); for (std::vector >::iterator I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) - updateSpillWeights(SpillWeights, I->first, I->second, mri_); + updateSpillWeights(SpillWeights, I->first, I->second, tri_); // for each interval in active, update spill weights. for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); i != e; ++i) { unsigned reg = i->first->reg; - assert(MRegisterInfo::isVirtualRegister(reg) && + assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); - updateSpillWeights(SpillWeights, reg, i->first->weight, mri_); + updateSpillWeights(SpillWeights, reg, i->first->weight, tri_); } - DEBUG(std::cerr << "\tassigning stack slot at interval "<< *cur << ":\n"); + DOUT << "\tassigning stack slot at interval "<< *cur << ":\n"; // Find a register to spill. - float minWeight = float(HUGE_VAL); - unsigned minReg = 0; - for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), - e = RC->allocation_order_end(*mf_); i != e; ++i) { - unsigned reg = *i; - if (minWeight > SpillWeights[reg]) { - minWeight = SpillWeights[reg]; - minReg = reg; + float minWeight = HUGE_VALF; + unsigned minReg = 0; /*cur->preference*/; // Try the preferred register first. + + bool Found = false; + std::vector > RegsWeights; + if (!minReg || SpillWeights[minReg] == HUGE_VALF) + for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), + e = RC->allocation_order_end(*mf_); i != e; ++i) { + unsigned reg = *i; + float regWeight = SpillWeights[reg]; + if (minWeight > regWeight) + Found = true; + RegsWeights.push_back(std::make_pair(reg, regWeight)); } - } // If we didn't find a register that is spillable, try aliases? - if (!minReg) { + if (!Found) { for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), e = RC->allocation_order_end(*mf_); i != e; ++i) { unsigned reg = *i; // No need to worry about if the alias register size < regsize of RC. // We are going to spill all registers that alias it anyway. - for (const unsigned* as = mri_->getAliasSet(reg); *as; ++as) { - if (minWeight > SpillWeights[*as]) { - minWeight = SpillWeights[*as]; - minReg = *as; - } - } + for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) + RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as])); } + } + // Sort all potential spill candidates by weight. + std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare()); + minReg = RegsWeights[0].first; + minWeight = RegsWeights[0].second; + if (minWeight == HUGE_VALF) { // All registers must have inf weight. Just grab one! - if (!minReg) - minReg = *RC->allocation_order_begin(*mf_); + minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_); + if (cur->weight == HUGE_VALF || + li_->getApproximateInstructionCount(*cur) == 0) { + // Spill a physical register around defs and uses. + li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_); + assignRegOrStackSlotAtInterval(cur); + return; + } } - - DEBUG(std::cerr << "\t\tregister with min weight: " - << mri_->getName(minReg) << " (" << minWeight << ")\n"); + + // Find up to 3 registers to consider as spill candidates. + unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1; + while (LastCandidate > 1) { + if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight)) + break; + --LastCandidate; + } + + DOUT << "\t\tregister(s) with min weight(s): "; + DEBUG(for (unsigned i = 0; i != LastCandidate; ++i) + DOUT << tri_->getName(RegsWeights[i].first) + << " (" << RegsWeights[i].second << ")\n"); // if the current has the minimum weight, we need to spill it and // add any added intervals back to unhandled, and restart // linearscan. - if (cur->weight != float(HUGE_VAL) && cur->weight <= minWeight) { - DEBUG(std::cerr << "\t\t\tspilling(c): " << *cur << '\n';); - int slot = vrm_->assignVirt2StackSlot(cur->reg); + if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { + DOUT << "\t\t\tspilling(c): " << *cur << '\n'; + float SSWeight; + SmallVector spillIs; std::vector added = - li_->addIntervalsForSpills(*cur, *vrm_, slot); + li_->addIntervalsForSpills(*cur, spillIs, loopInfo, *vrm_, SSWeight); + addStackInterval(cur, ls_, li_, SSWeight, *vrm_); if (added.empty()) return; // Early exit if all spills were folded. @@ -606,61 +914,44 @@ void RA::assignRegOrStackSlotAtInterval(LiveInterval* cur) // should go back right in the front of the list unhandled_.push(cur); - // otherwise we spill all intervals aliasing the register with + assert(TargetRegisterInfo::isPhysicalRegister(minReg) && + "did not choose a register to spill?"); + + // We spill all intervals aliasing the register with // minimum weight, rollback to the interval with the earliest // start point and let the linear scan algorithm run again - std::vector added; - assert(MRegisterInfo::isPhysicalRegister(minReg) && - "did not choose a register to spill?"); - std::vector toSpill(mri_->getNumRegs(), false); + SmallVector spillIs; - // We are going to spill minReg and all its aliases. - toSpill[minReg] = true; - for (const unsigned* as = mri_->getAliasSet(minReg); *as; ++as) - toSpill[*as] = true; + // Determine which intervals have to be spilled. + findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs); - // the earliest start of a spilled interval indicates up to where + // Set of spilled vregs (used later to rollback properly) + SmallSet spilled; + + // The earliest start of a Spilled interval indicates up to where // in handled we need to roll back unsigned earliestStart = cur->beginNumber(); - // set of spilled vregs (used later to rollback properly) - std::set spilled; - - // spill live intervals of virtual regs mapped to the physical register we + // Spill live intervals of virtual regs mapped to the physical register we // want to clear (and its aliases). We only spill those that overlap with the // current interval as the rest do not affect its allocation. we also keep // track of the earliest start of all spilled live intervals since this will // mark our rollback point. - for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { - unsigned reg = i->first->reg; - if (//MRegisterInfo::isVirtualRegister(reg) && - toSpill[vrm_->getPhys(reg)] && - cur->overlapsFrom(*i->first, i->second)) { - DEBUG(std::cerr << "\t\t\tspilling(a): " << *i->first << '\n'); - earliestStart = std::min(earliestStart, i->first->beginNumber()); - int slot = vrm_->assignVirt2StackSlot(i->first->reg); - std::vector newIs = - li_->addIntervalsForSpills(*i->first, *vrm_, slot); - std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); - spilled.insert(reg); - } - } - for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ - unsigned reg = i->first->reg; - if (//MRegisterInfo::isVirtualRegister(reg) && - toSpill[vrm_->getPhys(reg)] && - cur->overlapsFrom(*i->first, i->second-1)) { - DEBUG(std::cerr << "\t\t\tspilling(i): " << *i->first << '\n'); - earliestStart = std::min(earliestStart, i->first->beginNumber()); - int slot = vrm_->assignVirt2StackSlot(reg); - std::vector newIs = - li_->addIntervalsForSpills(*i->first, *vrm_, slot); - std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); - spilled.insert(reg); - } + std::vector added; + while (!spillIs.empty()) { + LiveInterval *sli = spillIs.back(); + spillIs.pop_back(); + DOUT << "\t\t\tspilling(a): " << *sli << '\n'; + earliestStart = std::min(earliestStart, sli->beginNumber()); + float SSWeight; + std::vector newIs = + li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_, SSWeight); + addStackInterval(sli, ls_, li_, SSWeight, *vrm_); + std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); + spilled.insert(sli->reg); } - DEBUG(std::cerr << "\t\trolling back to: " << earliestStart << '\n'); + DOUT << "\t\trolling back to: " << earliestStart << '\n'; // Scan handled in reverse order up to the earliest start of a // spilled live interval and undo each one, restoring the state of @@ -670,7 +961,7 @@ void RA::assignRegOrStackSlotAtInterval(LiveInterval* cur) // If this interval starts before t we are done. if (i->beginNumber() < earliestStart) break; - DEBUG(std::cerr << "\t\t\tundo changes for: " << *i << '\n'); + DOUT << "\t\t\tundo changes for: " << *i << '\n'; handled_.pop_back(); // When undoing a live interval allocation we must know if it is active or @@ -678,23 +969,28 @@ void RA::assignRegOrStackSlotAtInterval(LiveInterval* cur) IntervalPtrs::iterator it; if ((it = FindIntervalInVector(active_, i)) != active_.end()) { active_.erase(it); - assert(!MRegisterInfo::isPhysicalRegister(i->reg)); + assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); if (!spilled.count(i->reg)) unhandled_.push(i); prt_->delRegUse(vrm_->getPhys(i->reg)); vrm_->clearVirt(i->reg); } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { inactive_.erase(it); - assert(!MRegisterInfo::isPhysicalRegister(i->reg)); + assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); if (!spilled.count(i->reg)) unhandled_.push(i); vrm_->clearVirt(i->reg); } else { - assert(MRegisterInfo::isVirtualRegister(i->reg) && + assert(TargetRegisterInfo::isVirtualRegister(i->reg) && "Can only allocate virtual registers!"); vrm_->clearVirt(i->reg); unhandled_.push(i); } + + // It interval has a preference, it must be defined by a copy. Clear the + // preference now since the source interval allocation may have been undone + // as well. + i->preference = 0; } // Rewind the iterators in the active, inactive, and fixed lists back to the @@ -710,9 +1006,9 @@ void RA::assignRegOrStackSlotAtInterval(LiveInterval* cur) LiveInterval *HI = handled_[i]; if (!HI->expiredAt(earliestStart) && HI->expiredAt(cur->beginNumber())) { - DEBUG(std::cerr << "\t\t\tundo changes for: " << *HI << '\n'); + DOUT << "\t\t\tundo changes for: " << *HI << '\n'; active_.push_back(std::make_pair(HI, HI->begin())); - assert(!MRegisterInfo::isPhysicalRegister(HI->reg)); + assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); prt_->addRegUse(vrm_->getPhys(HI->reg)); } } @@ -724,44 +1020,61 @@ void RA::assignRegOrStackSlotAtInterval(LiveInterval* cur) /// getFreePhysReg - return a free physical register for this virtual register /// interval if we have one, otherwise return 0. -unsigned RA::getFreePhysReg(LiveInterval *cur) { - std::vector inactiveCounts(mri_->getNumRegs(), 0); +unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { + SmallVector inactiveCounts; unsigned MaxInactiveCount = 0; - const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg); + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); i != e; ++i) { unsigned reg = i->first->reg; - assert(MRegisterInfo::isVirtualRegister(reg) && + assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); // If this is not in a related reg class to the register we're allocating, // don't check it. - const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(reg); + const TargetRegisterClass *RegRC = mri_->getRegClass(reg); if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { reg = vrm_->getPhys(reg); + if (inactiveCounts.size() <= reg) + inactiveCounts.resize(reg+1); ++inactiveCounts[reg]; MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); } } - const TargetRegisterClass* rc = mf_->getSSARegMap()->getRegClass(cur->reg); - unsigned FreeReg = 0; unsigned FreeRegInactiveCount = 0; - + + // If copy coalescer has assigned a "preferred" register, check if it's + // available first. + if (cur->preference) { + if (prt_->isRegAvail(cur->preference) && + RC->contains(cur->preference)) { + DOUT << "\t\tassigned the preferred register: " + << tri_->getName(cur->preference) << "\n"; + return cur->preference; + } else + DOUT << "\t\tunable to assign the preferred register: " + << tri_->getName(cur->preference) << "\n"; + } + // Scan for the first available register. - TargetRegisterClass::iterator I = rc->allocation_order_begin(*mf_); - TargetRegisterClass::iterator E = rc->allocation_order_end(*mf_); + TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_); + TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_); + assert(I != E && "No allocatable register in this register class!"); for (; I != E; ++I) if (prt_->isRegAvail(*I)) { FreeReg = *I; - FreeRegInactiveCount = inactiveCounts[FreeReg]; + if (FreeReg < inactiveCounts.size()) + FreeRegInactiveCount = inactiveCounts[FreeReg]; + else + FreeRegInactiveCount = 0; break; } - + // If there are no free regs, or if this reg has the max inactive count, // return this register. if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) return FreeReg; @@ -772,7 +1085,8 @@ unsigned RA::getFreePhysReg(LiveInterval *cur) { // reevaluated now. for (; I != E; ++I) { unsigned Reg = *I; - if (prt_->isRegAvail(Reg) && FreeRegInactiveCount < inactiveCounts[Reg]) { + if (prt_->isRegAvail(Reg) && Reg < inactiveCounts.size() && + FreeRegInactiveCount < inactiveCounts[Reg]) { FreeReg = Reg; FreeRegInactiveCount = inactiveCounts[Reg]; if (FreeRegInactiveCount == MaxInactiveCount) @@ -784,5 +1098,5 @@ unsigned RA::getFreePhysReg(LiveInterval *cur) { } FunctionPass* llvm::createLinearScanRegisterAllocator() { - return new RA(); + return new RALinScan(); }