X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocGreedy.cpp;h=de528a7373561de60918124671e48a471572f977;hb=4ba844388c586ee40871a52dc9d6eab883fde1b7;hp=ca9573138c8317e1ee818bebf6b21e4282584616;hpb=523823b89724cb574c4e899556ae22b239b3daa5;p=oota-llvm.git diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index ca9573138c8..de528a73735 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -35,8 +35,11 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -59,6 +62,28 @@ SplitSpillMode("split-spill-mode", cl::Hidden, clEnumValEnd), cl::init(SplitEditor::SM_Partition)); +static cl::opt +LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, + cl::desc("Last chance recoloring max depth"), + cl::init(5)); + +static cl::opt LastChanceRecoloringMaxInterference( + "lcr-max-interf", cl::Hidden, + cl::desc("Last chance recoloring maximum number of considered" + " interference at a time"), + cl::init(8)); + +static cl::opt +ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, + cl::desc("Exhaustive Search for registers bypassing the depth " + "and interference cutoffs of last chance recoloring")); + +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt +CSRFirstTimeCost("regalloc-csr-first-time-cost", + cl::desc("Cost for first time use of callee-saved register."), + cl::init(0), cl::Hidden); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -66,10 +91,19 @@ namespace { class RAGreedy : public MachineFunctionPass, public RegAllocBase, private LiveRangeEdit::Delegate { + // Convenient shortcuts. + typedef std::priority_queue > PQueue; + typedef SmallPtrSet SmallLISet; + typedef SmallSet SmallVirtRegSet; // context MachineFunction *MF; + // Shortcuts to some useful interface. + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RCI; + // analyses SlotIndexes *Indexes; MachineBlockFrequencyInfo *MBFI; @@ -80,8 +114,8 @@ class RAGreedy : public MachineFunctionPass, LiveDebugVariables *DebugVars; // state - OwningPtr SpillerInstance; - std::priority_queue > Queue; + std::unique_ptr SpillerInstance; + PQueue Queue; unsigned NextCascade; // Live ranges pass through a number of stages as we try to allocate them. @@ -120,6 +154,22 @@ class RAGreedy : public MachineFunctionPass, RS_Done }; + // Enum CutOffStage to keep a track whether the register allocation failed + // because of the cutoffs encountered in last chance recoloring. + // Note: This is used as bitmask. New value should be next power of 2. + enum CutOffStage { + // No cutoffs encountered + CO_None = 0, + + // lcr-max-depth cutoff encountered + CO_Depth = 1, + + // lcr-max-interf cutoff encountered + CO_Interf = 2 + }; + + uint8_t CutOffInfo; + #ifndef NDEBUG static const char *const StageName[]; #endif @@ -169,15 +219,14 @@ class RAGreedy : public MachineFunctionPass, void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } bool operator<(const EvictionCost &O) const { - if (BrokenHints != O.BrokenHints) - return BrokenHints < O.BrokenHints; - return MaxWeight < O.MaxWeight; + return std::tie(BrokenHints, MaxWeight) < + std::tie(O.BrokenHints, O.MaxWeight); } }; // splitting state. - OwningPtr SA; - OwningPtr SE; + std::unique_ptr SA; + std::unique_ptr SE; /// Cached per-block interference maps InterferenceCache IntfCache; @@ -226,38 +275,45 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector GlobalCand; - enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u }; + enum : unsigned { NoCand = ~0u }; /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to /// NoCand which indicates the stack interval. SmallVector BundleCand; + /// Callee-save register cost, calculated once per machine function. + BlockFrequency CSRCost; + public: RAGreedy(); /// Return the pass name. - virtual const char* getPassName() const { + const char* getPassName() const override { return "Greedy Register Allocator"; } /// RAGreedy analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - virtual Spiller &spiller() { return *SpillerInstance; } - virtual void enqueue(LiveInterval *LI); - virtual LiveInterval *dequeue(); - virtual unsigned selectOrSplit(LiveInterval&, - SmallVectorImpl&); + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + Spiller &spiller() override { return *SpillerInstance; } + void enqueue(LiveInterval *LI) override; + LiveInterval *dequeue() override; + unsigned selectOrSplit(LiveInterval&, SmallVectorImpl&) override; /// Perform register allocation. - virtual bool runOnMachineFunction(MachineFunction &mf); + bool runOnMachineFunction(MachineFunction &mf) override; static char ID; private: - bool LRE_CanEraseVirtReg(unsigned); - void LRE_WillShrinkVirtReg(unsigned); - void LRE_DidCloneVirtReg(unsigned, unsigned); + unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl &, + SmallVirtRegSet &, unsigned = 0); + + bool LRE_CanEraseVirtReg(unsigned) override; + void LRE_WillShrinkVirtReg(unsigned) override; + void LRE_DidCloneVirtReg(unsigned, unsigned) override; + void enqueue(PQueue &CurQueue, LiveInterval *LI); + LiveInterval *dequeue(PQueue &CurQueue); BlockFrequency calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); @@ -272,6 +328,9 @@ private: bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); void evictInterference(LiveInterval&, unsigned, SmallVectorImpl&); + bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters); unsigned tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl&); @@ -279,6 +338,21 @@ private: SmallVectorImpl&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + /// Calculate cost of region splitting. + unsigned calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, bool IgnoreCSR); + /// Perform region splitting. + unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl &NewVRegs); + /// Check other options before using a callee-saved register for the first + /// time. + unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, + unsigned PhysReg, unsigned &CostPerUseLimit, + SmallVectorImpl &NewVRegs); + void initializeCSRCost(); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, @@ -287,6 +361,11 @@ private: SmallVectorImpl&); unsigned trySplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, + SmallVectorImpl &, + SmallVirtRegSet &, unsigned); + bool tryRecoloringCandidates(PQueue &, SmallVectorImpl &, + SmallVirtRegSet &, unsigned); }; } // end anonymous namespace @@ -305,7 +384,7 @@ const char *const RAGreedy::StageName[] = { // Hysteresis to use when comparing floats. // This helps stabilize decisions based on float comparisons. -const float Hysteresis = 0.98f; +const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 FunctionPass* llvm::createGreedyRegisterAllocator() { @@ -395,12 +474,14 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { } void RAGreedy::releaseMemory() { - SpillerInstance.reset(0); + SpillerInstance.reset(nullptr); ExtraRegInfo.clear(); GlobalCand.clear(); } -void RAGreedy::enqueue(LiveInterval *LI) { +void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } + +void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); @@ -418,12 +499,18 @@ void RAGreedy::enqueue(LiveInterval *LI) { // everything else has been allocated. Prio = Size; } else { - if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() && + // Giant live ranges fall back to the global assignment heuristic, which + // prevents excessive spilling in pathological cases. + bool ReverseLocal = TRI->reverseLocalAssignment(); + bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() && + (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); + + if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { // Allocate original local ranges in linear instruction order. Since they // are singly defined, this produces optimal coloring in the absence of // global interference and other constraints. - if (!TRI->reverseLocalAssignment()) + if (!ReverseLocal) Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); else { // Allocating bottom up may allow many short LRGs to be assigned first @@ -447,14 +534,16 @@ void RAGreedy::enqueue(LiveInterval *LI) { } // The virtual register number is a tie breaker for same-sized ranges. // Give lower vreg numbers higher priority to assign them first. - Queue.push(std::make_pair(Prio, ~Reg)); + CurQueue.push(std::make_pair(Prio, ~Reg)); } -LiveInterval *RAGreedy::dequeue() { - if (Queue.empty()) - return 0; - LiveInterval *LI = &LIS->getInterval(~Queue.top().second); - Queue.pop(); +LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } + +LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { + if (CurQueue.empty()) + return nullptr; + LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); + CurQueue.pop(); return LI; } @@ -563,7 +652,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, } /// canEvictInterference - Return true if all interferences between VirtReg and -/// PhysReg can be evicted. When OnlyCheap is set, don't do anything +/// PhysReg can be evicted. /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. @@ -1189,9 +1278,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { unsigned NumCands = 0; - unsigned BestCand = NoCand; BlockFrequency BestCost; - SmallVector UsedCands; // Check if we can split this live range around a compact region. bool HasCompact = calcCompactRegion(GlobalCand.front()); @@ -1207,8 +1294,29 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } + unsigned BestCand = + calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, + false/*IgnoreCSR*/); + + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) + return 0; + + return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); +} + +unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, + bool IgnoreCSR) { + unsigned BestCand = NoCand; Order.rewind(); while (unsigned PhysReg = Order.next()) { + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (IgnoreCSR && !MRI->isPhysRegUsed(CSR)) + continue; + // Discard bad candidates before we run out of interference cache cursors. // This will only affect register classes with a lot of registers (>32). if (NumCands == IntfCache.getMaxCursors()) { @@ -1275,11 +1383,13 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } ++NumCands; } + return BestCand; +} - // No solutions found, fall back to single block splitting. - if (!HasCompact && BestCand == NoCand) - return 0; - +unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl &NewVRegs) { + SmallVector UsedCands; // Prepare split editor. LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); @@ -1368,6 +1478,22 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Per-Instruction Splitting //===----------------------------------------------------------------------===// +/// Get the number of allocatable registers that match the constraints of \p Reg +/// on \p MI and that are also in \p SuperRC. +static unsigned getNumAllocatableRegsForConstraints( + const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, + const RegisterClassInfo &RCI) { + assert(SuperRC && "Invalid register class"); + + const TargetRegisterClass *ConstrainedRC = + MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, + /* ExploreBundle */ true); + if (!ConstrainedRC) + return 0; + return RCI.getNumAllocatableRegs(ConstrainedRC); +} + /// tryInstructionSplit - Split a live range around individual instructions. /// This is normally not worthwhile since the spiller is doing essentially the /// same thing. However, when the live range is in a constrained register @@ -1378,8 +1504,9 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); // There is no point to this if there are no larger sub-classes. - if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + if (!RegClassInfo.isProperSubClass(CurRC)) return 0; // Always enable split spill mode, since we're effectively spilling to a @@ -1393,10 +1520,18 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); - // Split around every non-copy instruction. + const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); + unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); + // Split around every non-copy instruction if this split will relax + // the constraints on the virtual register. + // Otherwise, splitting just inserts uncoalescable copies that do not help + // the allocation. for (unsigned i = 0; i != Uses.size(); ++i) { if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) - if (MI->isFullCopy()) { + if (MI->isFullCopy() || + SuperRCNumAllocatableRegs == + getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, + TRI, RCI)) { DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); continue; } @@ -1779,6 +1914,222 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, return tryBlockSplit(VirtReg, Order, NewVRegs); } +//===----------------------------------------------------------------------===// +// Last Chance Recoloring +//===----------------------------------------------------------------------===// + +/// mayRecolorAllInterferences - Check if the virtual registers that +/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be +/// recolored to free \p PhysReg. +/// When true is returned, \p RecoloringCandidates has been augmented with all +/// the live intervals that need to be recolored in order to free \p PhysReg +/// for \p VirtReg. +/// \p FixedRegisters contains all the virtual registers that cannot be +/// recolored. +bool +RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); + + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + // If there is LastChanceRecoloringMaxInterference or more interferences, + // chances are one would not be recolorable. + if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= + LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { + DEBUG(dbgs() << "Early abort: too many interferences.\n"); + CutOffInfo |= CO_Interf; + return false; + } + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + // If Intf is done and sit on the same register class as VirtReg, + // it would not be recolorable as it is in the same state as VirtReg. + if ((getStage(*Intf) == RS_Done && + MRI->getRegClass(Intf->reg) == CurRC) || + FixedRegisters.count(Intf->reg)) { + DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); + return false; + } + RecoloringCandidates.insert(Intf); + } + } + return true; +} + +/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring +/// its interferences. +/// Last chance recoloring chooses a color for \p VirtReg and recolors every +/// virtual register that was using it. The recoloring process may recursively +/// use the last chance recoloring. Therefore, when a virtual register has been +/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot +/// be last-chance-recolored again during this recoloring "session". +/// E.g., +/// Let +/// vA can use {R1, R2 } +/// vB can use { R2, R3} +/// vC can use {R1 } +/// Where vA, vB, and vC cannot be split anymore (they are reloads for +/// instance) and they all interfere. +/// +/// vA is assigned R1 +/// vB is assigned R2 +/// vC tries to evict vA but vA is already done. +/// Regular register allocation fails. +/// +/// Last chance recoloring kicks in: +/// vC does as if vA was evicted => vC uses R1. +/// vC is marked as fixed. +/// vA needs to find a color. +/// None are available. +/// vA cannot evict vC: vC is a fixed virtual register now. +/// vA does as if vB was evicted => vA uses R2. +/// vB needs to find a color. +/// R3 is available. +/// Recoloring => vC = R1, vA = R2, vB = R3 +/// +/// \p Order defines the preferred allocation order for \p VirtReg. +/// \p NewRegs will contain any new virtual register that have been created +/// (split, spill) during the process and that must be assigned. +/// \p FixedRegisters contains all the virtual registers that cannot be +/// recolored. +/// \p Depth gives the current depth of the last chance recoloring. +/// \return a physical register that can be used for VirtReg or ~0u if none +/// exists. +unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); + // Ranges must be Done. + assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && + "Last chance recoloring should really be last chance"); + // Set the max depth to LastChanceRecoloringMaxDepth. + // We may want to reconsider that if we end up with a too large search space + // for target with hundreds of registers. + // Indeed, in that case we may want to cut the search space earlier. + if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { + DEBUG(dbgs() << "Abort because max depth has been reached.\n"); + CutOffInfo |= CO_Depth; + return ~0u; + } + + // Set of Live intervals that will need to be recolored. + SmallLISet RecoloringCandidates; + // Record the original mapping virtual register to physical register in case + // the recoloring fails. + DenseMap VirtRegToPhysReg; + // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in + // this recoloring "session". + FixedRegisters.insert(VirtReg.reg); + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " + << PrintReg(PhysReg, TRI) << '\n'); + RecoloringCandidates.clear(); + VirtRegToPhysReg.clear(); + + // It is only possible to recolor virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > + LiveRegMatrix::IK_VirtReg) { + DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); + + continue; + } + + // Early give up on this PhysReg if it is obvious we cannot recolor all + // the interferences. + if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, + FixedRegisters)) { + DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); + continue; + } + + // RecoloringCandidates contains all the virtual registers that interfer + // with VirtReg on PhysReg (or one of its aliases). + // Enqueue them for recoloring and perform the actual recoloring. + PQueue RecoloringQueue; + for (SmallLISet::iterator It = RecoloringCandidates.begin(), + EndIt = RecoloringCandidates.end(); + It != EndIt; ++It) { + unsigned ItVirtReg = (*It)->reg; + enqueue(RecoloringQueue, *It); + assert(VRM->hasPhys(ItVirtReg) && + "Interferences are supposed to be with allocated vairables"); + + // Record the current allocation. + VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); + // unset the related struct. + Matrix->unassign(**It); + } + + // Do as if VirtReg was assigned to PhysReg so that the underlying + // recoloring has the right information about the interferes and + // available colors. + Matrix->assign(VirtReg, PhysReg); + + // Save the current recoloring state. + // If we cannot recolor all the interferences, we will have to start again + // at this point for the next physical register. + SmallVirtRegSet SaveFixedRegisters(FixedRegisters); + if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, + Depth)) { + // Do not mess up with the global assignment process. + // I.e., VirtReg must be unassigned. + Matrix->unassign(VirtReg); + return PhysReg; + } + + DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " + << PrintReg(PhysReg, TRI) << '\n'); + + // The recoloring attempt failed, undo the changes. + FixedRegisters = SaveFixedRegisters; + Matrix->unassign(VirtReg); + + for (SmallLISet::iterator It = RecoloringCandidates.begin(), + EndIt = RecoloringCandidates.end(); + It != EndIt; ++It) { + unsigned ItVirtReg = (*It)->reg; + if (VRM->hasPhys(ItVirtReg)) + Matrix->unassign(**It); + Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]); + } + } + + // Last chance recoloring did not worked either, give up. + return ~0u; +} + +/// tryRecoloringCandidates - Try to assign a new color to every register +/// in \RecoloringQueue. +/// \p NewRegs will contain any new virtual register created during the +/// recoloring process. +/// \p FixedRegisters[in/out] contains all the registers that have been +/// recolored. +/// \return true if all virtual registers in RecoloringQueue were successfully +/// recolored, false otherwise. +bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, + SmallVectorImpl &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + while (!RecoloringQueue.empty()) { + LiveInterval *LI = dequeue(RecoloringQueue); + DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); + unsigned PhysReg; + PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); + if (PhysReg == ~0u || !PhysReg) + return false; + DEBUG(dbgs() << "Recoloring of " << *LI + << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); + Matrix->assign(*LI, PhysReg); + FixedRegisters.insert(LI->reg); + } + return true; +} //===----------------------------------------------------------------------===// // Main Entry Point @@ -1786,10 +2137,122 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { + CutOffInfo = CO_None; + LLVMContext &Ctx = MF->getFunction()->getContext(); + SmallVirtRegSet FixedRegisters; + unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + if (Reg == ~0U && (CutOffInfo != CO_None)) { + uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); + if (CutOffEncountered == CO_Depth) + Ctx.emitError("register allocation failed: maximum depth for recoloring " + "reached. Use -fexhaustive-register-search to skip " + "cutoffs"); + else if (CutOffEncountered == CO_Interf) + Ctx.emitError("register allocation failed: maximum interference for " + "recoloring reached. Use -fexhaustive-register-search " + "to skip cutoffs"); + else if (CutOffEncountered == (CO_Depth | CO_Interf)) + Ctx.emitError("register allocation failed: maximum interference and " + "depth for recoloring reached. Use " + "-fexhaustive-register-search to skip cutoffs"); + } + return Reg; +} + +/// Using a CSR for the first time has a cost because it causes push|pop +/// to be added to prologue|epilogue. Splitting a cold section of the live +/// range can have lower cost than using the CSR for the first time; +/// Spilling a live range in the cold path can have lower cost than using +/// the CSR for the first time. Returns the physical register if we decide +/// to use the CSR; otherwise return 0. +unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, + AllocationOrder &Order, + unsigned PhysReg, + unsigned &CostPerUseLimit, + SmallVectorImpl &NewVRegs) { + if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { + // We choose spill over using the CSR for the first time if the spill cost + // is lower than CSRCost. + SA->analyze(&VirtReg); + if (calcSpillCost() >= CSRCost) + return PhysReg; + + // We are going to spill, set CostPerUseLimit to 1 to make sure that + // we will not use a callee-saved register in tryEvict. + CostPerUseLimit = 1; + return 0; + } + if (getStage(VirtReg) < RS_Split) { + // We choose pre-splitting over using the CSR for the first time if + // the cost of splitting is lower than CSRCost. + SA->analyze(&VirtReg); + unsigned NumCands = 0; + BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. + unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, + NumCands, true /*IgnoreCSR*/); + if (BestCand == NoCand) + // Use the CSR if we can't find a region split below CSRCost. + return PhysReg; + + // Perform the actual pre-splitting. + doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); + return 0; + } + return PhysReg; +} + +void RAGreedy::initializeCSRCost() { + // We use the larger one out of the command-line option and the value report + // by TRI. + CSRCost = BlockFrequency( + std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); + if (!CSRCost.getFrequency()) + return; + + // Raw cost is relative to Entry == 2^14; scale it appropriately. + uint64_t ActualEntry = MBFI->getEntryFreq(); + if (!ActualEntry) { + CSRCost = 0; + return; + } + uint64_t FixedEntry = 1 << 14; + if (ActualEntry < FixedEntry) + CSRCost *= BranchProbability(ActualEntry, FixedEntry); + else if (ActualEntry <= UINT32_MAX) + // Invert the fraction and divide. + CSRCost /= BranchProbability(FixedEntry, ActualEntry); + else + // Can't use BranchProbability in general, since it takes 32-bit numbers. + CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); +} + +unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, + SmallVectorImpl &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + unsigned CostPerUseLimit = ~0u; // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) - return PhysReg; + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { + // We check other options if we are using a CSR for the first time. + bool CSRFirstUse = false; + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (!MRI->isPhysRegUsed(CSR)) + CSRFirstUse = true; + + // When NewVRegs is not empty, we may have made decisions such as evicting + // a virtual register, go with the earlier decisions and use the physical + // register. + if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) { + unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, + CostPerUseLimit, NewVRegs); + if (CSRReg || !NewVRegs.empty()) + // Return now if we decide to use a CSR or create new vregs due to + // pre-splitting. + return CSRReg; + } else + return PhysReg; + } LiveRangeStage Stage = getStage(VirtReg); DEBUG(dbgs() << StageName[Stage] @@ -1799,7 +2262,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. if (Stage != RS_Split) - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) return PhysReg; assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); @@ -1817,7 +2280,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class wil report it. if (Stage >= RS_Done || !VirtReg.isSpillable()) - return ~0u; + return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, + Depth); // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); @@ -1843,6 +2307,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; + TRI = MF->getTarget().getRegisterInfo(); + TII = MF->getTarget().getInstrInfo(); + RCI.runOnMachineFunction(mf); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -1858,6 +2325,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis(); DebugVars = &getAnalysis(); + initializeCSRCost(); + calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); DEBUG(LIS->dump());