X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocGreedy.cpp;h=366c94e2f47d0432f6bb166a4b8f275ce66cc527;hb=feb468ab24a9e85b4d27faa6badfb57a2414610c;hp=7564c1da89a63b6f910b71c600d28cc4dc1cb554;hpb=9a54352879e5aaac2e2c37490e5cb7844550db8b;p=oota-llvm.git diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 7564c1da89a..366c94e2f47 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -33,12 +33,11 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineLoopRanges.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -52,6 +51,15 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); +static cl::opt +SplitSpillMode("split-spill-mode", cl::Hidden, + cl::desc("Spill mode for splitting live ranges"), + cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), + clEnumValEnd), + cl::init(SplitEditor::SM_Partition)); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -62,20 +70,20 @@ class RAGreedy : public MachineFunctionPass, // context MachineFunction *MF; - BitVector ReservedRegs; // analyses SlotIndexes *Indexes; LiveStacks *LS; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; - MachineLoopRanges *LoopRanges; EdgeBundles *Bundles; SpillPlacement *SpillPlacer; + LiveDebugVariables *DebugVars; // state std::auto_ptr SpillerInstance; std::priority_queue > Queue; + unsigned NextCascade; // Live ranges pass through a number of stages as we try to allocate them. // Some of the stages may also create new live ranges: @@ -91,31 +99,75 @@ class RAGreedy : public MachineFunctionPass, // range splitting algorithm terminates, something that is otherwise hard to // ensure. enum LiveRangeStage { - RS_New, ///< Never seen before. - RS_First, ///< First time in the queue. - RS_Second, ///< Second time in the queue. - RS_Region, ///< Produced by region splitting. - RS_Block, ///< Produced by per-block splitting. - RS_Local, ///< Produced by local splitting. - RS_Spill ///< Produced by spilling. + /// Newly created live range that has never been queued. + RS_New, + + /// Only attempt assignment and eviction. Then requeue as RS_Split. + RS_Assign, + + /// Attempt live range splitting if assignment is impossible. + RS_Split, + + /// Attempt more aggressive live range splitting that is guaranteed to make + /// progress. This is used for split products that may not be making + /// progress. + RS_Split2, + + /// Live range will be spilled. No more splitting will be attempted. + RS_Spill, + + /// There is nothing more we can do to this live range. Abort compilation + /// if it can't be assigned. + RS_Done + }; + + static const char *const StageName[]; + + // RegInfo - Keep additional information about each live range. + struct RegInfo { + LiveRangeStage Stage; + + // Cascade - Eviction loop prevention. See canEvictInterference(). + unsigned Cascade; + + RegInfo() : Stage(RS_New), Cascade(0) {} }; - IndexedMap LRStage; + IndexedMap ExtraRegInfo; LiveRangeStage getStage(const LiveInterval &VirtReg) const { - return LiveRangeStage(LRStage[VirtReg.reg]); + return ExtraRegInfo[VirtReg.reg].Stage; + } + + void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + ExtraRegInfo[VirtReg.reg].Stage = Stage; } template void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { - LRStage.resize(MRI->getNumVirtRegs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); for (;Begin != End; ++Begin) { unsigned Reg = (*Begin)->reg; - if (LRStage[Reg] == RS_New) - LRStage[Reg] = NewStage; + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = NewStage; } } + /// Cost of evicting interference. + struct EvictionCost { + unsigned BrokenHints; ///< Total number of broken hints. + float MaxWeight; ///< Maximum spill weight evicted. + + EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + + bool operator<(const EvictionCost &O) const { + if (BrokenHints != O.BrokenHints) + return BrokenHints < O.BrokenHints; + return MaxWeight < O.MaxWeight; + } + }; + // splitting state. std::auto_ptr SA; std::auto_ptr SE; @@ -123,13 +175,43 @@ class RAGreedy : public MachineFunctionPass, /// Cached per-block interference maps InterferenceCache IntfCache; - /// All basic blocks where the current register is live. + /// All basic blocks where the current register has uses. SmallVector SplitConstraints; /// Global live range splitting candidate info. struct GlobalSplitCandidate { + // Register intended for assignment, or 0. unsigned PhysReg; + + // SplitKit interval index for this candidate. + unsigned IntvIdx; + + // Interference for PhysReg. + InterferenceCache::Cursor Intf; + + // Bundles where this candidate should be live. BitVector LiveBundles; + SmallVector ActiveBlocks; + + void reset(InterferenceCache &Cache, unsigned Reg) { + PhysReg = Reg; + IntvIdx = 0; + Intf.setPhysReg(Cache, Reg); + LiveBundles.clear(); + ActiveBlocks.clear(); + } + + // Set B[i] = C for every live bundle where B[i] was NoCand. + unsigned getBundles(SmallVectorImpl &B, unsigned C) { + unsigned Count = 0; + for (int i = LiveBundles.find_first(); i >= 0; + i = LiveBundles.find_next(i)) + if (B[i] == NoCand) { + B[i] = C; + Count++; + } + return Count; + } }; /// Candidate info for for each PhysReg in AllocationOrder. @@ -137,9 +219,11 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector GlobalCand; - /// For every instruction in SA->UseSlots, store the previous non-copy - /// instruction. - SmallVector PrevSlot; + enum { NoCand = ~0u }; + + /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to + /// NoCand which indicates the stack interval. + SmallVector BundleCand; public: RAGreedy(); @@ -164,25 +248,31 @@ public: static char ID; private: - void LRE_WillEraseInstruction(MachineInstr*); bool LRE_CanEraseVirtReg(unsigned); void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); - bool addSplitConstraints(unsigned, float&); - float calcGlobalSplitCost(unsigned, const BitVector&); - void splitAroundRegion(LiveInterval&, unsigned, const BitVector&, - SmallVectorImpl&); + float calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, float&); + void addThroughConstraints(InterferenceCache::Cursor, ArrayRef); + void growRegion(GlobalSplitCandidate &Cand); + float calcGlobalSplitCost(GlobalSplitCandidate&); + bool calcCompactRegion(GlobalSplitCandidate&); + void splitAroundRegion(LiveRangeEdit&, ArrayRef); void calcGapWeights(unsigned, SmallVectorImpl&); - SlotIndex getPrevMappedIndex(const MachineInstr*); - void calcPrevSlots(); - unsigned nextSplitPoint(unsigned); - bool canEvictInterference(LiveInterval&, unsigned, float&); + bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); + bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + void evictInterference(LiveInterval&, unsigned, + SmallVectorImpl&); + unsigned tryAssign(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); + SmallVectorImpl&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned trySplit(LiveInterval&, AllocationOrder&, @@ -192,22 +282,37 @@ private: char RAGreedy::ID = 0; +#ifndef NDEBUG +const char *const RAGreedy::StageName[] = { + "RS_New", + "RS_Assign", + "RS_Split", + "RS_Split2", + "RS_Spill", + "RS_Done" +}; +#endif + +// Hysteresis to use when comparing floats. +// This helps stabilize decisions based on float comparisons. +const float Hysteresis = 0.98f; + + FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) { +RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); @@ -224,7 +329,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitive(); + AU.addRequiredTransitiveID(RegisterCoalescerPassID); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -232,8 +337,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -246,11 +349,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { - // LRE itself will remove from SlotIndexes and parent basic block. - VRM->RemoveMachineInstrFromMaps(MI); -} - bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (unsigned PhysReg = VRM->getPhys(VirtReg)) { unassign(LIS->getInterval(VirtReg), PhysReg); @@ -273,16 +371,23 @@ void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { } void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // Cloning a register we haven't even heard about yet? Just ignore it. + if (!ExtraRegInfo.inBounds(Old)) + return; + // LRE may clone a virtual register because dead code elimination causes it to - // be split into connected components. Ensure that the new register gets the + // be split into connected components. The new components are much smaller + // than the original, so they should get a new chance at being assigned. // same stage as the parent. - LRStage.grow(New); - LRStage[New] = LRStage[Old]; + ExtraRegInfo[Old].Stage = RS_Assign; + ExtraRegInfo.grow(New); + ExtraRegInfo[New] = ExtraRegInfo[Old]; } void RAGreedy::releaseMemory() { SpillerInstance.reset(0); - LRStage.clear(); + ExtraRegInfo.clear(); + GlobalCand.clear(); RegAllocBase::releaseMemory(); } @@ -295,18 +400,17 @@ void RAGreedy::enqueue(LiveInterval *LI) { "Can only enqueue virtual registers"); unsigned Prio; - LRStage.grow(Reg); - if (LRStage[Reg] == RS_New) - LRStage[Reg] = RS_First; + ExtraRegInfo.grow(Reg); + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = RS_Assign; - if (LRStage[Reg] == RS_Second) + if (ExtraRegInfo[Reg].Stage == RS_Split) { // Unsplit ranges that couldn't be allocated immediately are deferred until - // everything else has been allocated. Long ranges are allocated last so - // they are split against realistic interference. - Prio = (1u << 31) - Size; - else { - // Everything else is allocated in long->short order. Long ranges that don't - // fit should be spilled ASAP so they don't create interference. + // everything else has been allocated. + Prio = Size; + } else { + // Everything is allocated in long->short order. Long ranges that don't fit + // should be spilled (or split) ASAP so they don't create interference. Prio = (1u << 31) + Size; // Boost ranges that have a physical register hint. @@ -325,63 +429,218 @@ LiveInterval *RAGreedy::dequeue() { return LI; } + +//===----------------------------------------------------------------------===// +// Direct Assignment +//===----------------------------------------------------------------------===// + +/// tryAssign - Try to assign VirtReg to an available register. +unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { + Order.rewind(); + unsigned PhysReg; + while ((PhysReg = Order.next())) + if (!checkPhysRegInterference(VirtReg, PhysReg)) + break; + if (!PhysReg || Order.isHint(PhysReg)) + return PhysReg; + + // PhysReg is available, but there may be a better choice. + + // If we missed a simple hint, try to cheaply evict interference from the + // preferred register. + if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) + if (Order.isHint(Hint)) { + DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); + EvictionCost MaxCost(1); + if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { + evictInterference(VirtReg, Hint, NewVRegs); + return Hint; + } + } + + // Try to evict interference from a cheaper alternative. + unsigned Cost = TRI->getCostPerUse(PhysReg); + + // Most registers have 0 additional cost. + if (!Cost) + return PhysReg; + + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost + << '\n'); + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); + return CheapReg ? CheapReg : PhysReg; +} + + //===----------------------------------------------------------------------===// // Interference eviction //===----------------------------------------------------------------------===// -/// canEvict - Return true if all interferences between VirtReg and PhysReg can -/// be evicted. Set maxWeight to the maximal spill weight of an interference. +/// shouldEvict - determine if A should evict the assigned live range B. The +/// eviction policy defined by this function together with the allocation order +/// defined by enqueue() decides which registers ultimately end up being split +/// and spilled. +/// +/// Cascade numbers are used to prevent infinite loops if this function is a +/// cyclic relation. +/// +/// @param A The live range to be assigned. +/// @param IsHint True when A is about to be assigned to its preferred +/// register. +/// @param B The live range to be evicted. +/// @param BreaksHint True when B is already assigned to its preferred register. +bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, + LiveInterval &B, bool BreaksHint) { + bool CanSplit = getStage(B) < RS_Spill; + + // Be fairly aggressive about following hints as long as the evictee can be + // split. + if (CanSplit && IsHint && !BreaksHint) + return true; + + return A.weight > B.weight; +} + +/// canEvictInterference - Return true if all interferences between VirtReg and +/// PhysReg can be evicted. When OnlyCheap is set, don't do anything +/// +/// @param VirtReg Live range that is about to be assigned. +/// @param PhysReg Desired register for assignment. +/// @prarm IsHint True when PhysReg is VirtReg's preferred register. +/// @param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, - float &MaxWeight) { - float Weight = 0; + bool IsHint, EvictionCost &MaxCost) { + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never + // involved in an eviction before. If a cascade number was assigned, deny + // evicting anything with the same or a newer cascade number. This prevents + // infinite eviction loops. + // + // This works out so a register without a cascade number is allowed to evict + // anything, and it can be evicted by anything. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = NextCascade; + + EvictionCost Cost; for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - // If there is 10 or more interferences, chances are one is smaller. + // If there is 10 or more interferences, chances are one is heavier. if (Q.collectInterferingVRegs(10) >= 10) return false; - // Check if any interfering live range is heavier than VirtReg. - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) return false; - if (Intf->weight >= VirtReg.weight) + // Never evict spill products. They cannot split or spill. + if (getStage(*Intf) == RS_Done) + return false; + // Once a live range becomes small enough, it is urgent that we find a + // register for it. This is indicated by an infinite spill weight. These + // urgent live ranges get to evict almost anything. + bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable(); + // Only evict older cascades or live ranges without a cascade. + unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; + if (Cascade <= IntfCascade) { + if (!Urgent) + return false; + // We permit breaking cascades for urgent evictions. It should be the + // last resort, though, so make it really expensive. + Cost.BrokenHints += 10; + } + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) + return false; + // Finally, apply the eviction policy for non-urgent evictions. + if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) return false; - Weight = std::max(Weight, Intf->weight); } } - MaxWeight = Weight; + MaxCost = Cost; return true; } +/// evictInterference - Evict any interferring registers that prevent VirtReg +/// from being assigned to Physreg. This assumes that canEvictInterference +/// returned true. +void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl &NewVRegs) { + // Make sure that VirtReg has a cascade number, and assign that cascade + // number to every evicted register. These live ranges than then only be + // evicted by a newer cascade, preventing infinite loops. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; + + DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) + << " interference: Cascade " << Cascade << '\n'); + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + assert(Q.seenAllInterferences() && "Didn't check all interfererences."); + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { + LiveInterval *Intf = Q.interferingVRegs()[i]; + unassign(*Intf, VRM->getPhys(Intf->reg)); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); + } + } +} + /// tryEvict - Try to evict all interferences for a physreg. /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs){ + SmallVectorImpl &NewVRegs, + unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); - // Keep track of the lightest single interference seen so far. - float BestWeight = 0; + // Keep track of the cheapest interference seen so far. + EvictionCost BestCost(~0u); unsigned BestPhys = 0; + // When we are just looking for a reduced cost per use, don't break any + // hints, and only evict smaller spill weights. + if (CostPerUseLimit < ~0u) { + BestCost.BrokenHints = 0; + BestCost.MaxWeight = VirtReg.weight; + } + Order.rewind(); while (unsigned PhysReg = Order.next()) { - float Weight = 0; - if (!canEvictInterference(VirtReg, PhysReg, Weight)) + if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; - - // This is an eviction candidate. - DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = " - << Weight << '\n'); - if (BestPhys && Weight >= BestWeight) + // The first use of a callee-saved register in a function has cost 1. + // Don't start using a CSR when the CostPerUseLimit is low. + if (CostPerUseLimit == 1) + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (!MRI->isPhysRegUsed(CSR)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " + << PrintReg(CSR, TRI) << '\n'); + continue; + } + + if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) continue; // Best so far. BestPhys = PhysReg; - BestWeight = Weight; + // Stop if the hint can be used. if (Order.isHint(PhysReg)) break; @@ -390,17 +649,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, if (!BestPhys) return 0; - DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n"); - for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - assert(Q.seenAllInterferences() && "Didn't check all interfererences."); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; - unassign(*Intf, VRM->getPhys(Intf->reg)); - ++NumEvicted; - NewVRegs.push_back(Intf); - } - } + evictInterference(VirtReg, BestPhys, NewVRegs); return BestPhys; } @@ -413,9 +662,9 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, /// interference pattern in Physreg and its aliases. Add the constraints to /// SpillPlacement and return the static cost of this split in Cost, assuming /// that all preferences in SplitConstraints are met. -/// If it is evident that no bundles will be live, abort early and return false. -bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { - InterferenceCache::Cursor Intf(IntfCache, PhysReg); +/// Return false if there are no bundles with positive bias. +bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, + float &Cost) { ArrayRef UseBlocks = SA->getUseBlocks(); // Reset interference dependent info. @@ -429,6 +678,7 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.ChangesValue = BI.FirstDef; if (!Intf.hasInterference()) continue; @@ -440,9 +690,9 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { if (BI.LiveIn) { if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) BC.Entry = SpillPlacement::MustSpill, ++Ins; - else if (Intf.first() < BI.FirstUse) + else if (Intf.first() < BI.FirstInstr) BC.Entry = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.first() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) + else if (Intf.first() < BI.LastInstr) ++Ins; } @@ -450,9 +700,9 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { if (BI.LiveOut) { if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) BC.Exit = SpillPlacement::MustSpill, ++Ins; - else if (Intf.last() > BI.LastUse) + else if (Intf.last() > BI.LastInstr) BC.Exit = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.last() > (BI.LiveThrough ? BI.FirstUse : BI.Def)) + else if (Intf.last() > BI.FirstInstr) ++Ins; } @@ -460,69 +710,185 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { if (Ins) StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); } + Cost = StaticCost; // Add constraints for use-blocks. Note that these are the only constraints // that may add a positive bias, it is downhill from here. SpillPlacer->addConstraints(SplitConstraints); - if (SpillPlacer->getPositiveNodes() == 0) - return false; + return SpillPlacer->scanActiveBundles(); +} - Cost = StaticCost; - // Now handle the live-through blocks without uses. These can only add - // negative bias, so we can abort whenever there are no more positive nodes. - // Compute constraints for a group of 8 blocks at a time. +/// addThroughConstraints - Add constraints and links to SpillPlacer from the +/// live-through blocks in Blocks. +void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, + ArrayRef Blocks) { const unsigned GroupSize = 8; SpillPlacement::BlockConstraint BCS[GroupSize]; - unsigned B = 0; + unsigned TBS[GroupSize]; + unsigned B = 0, T = 0; - ArrayRef ThroughBlocks = SA->getThroughBlocks(); - for (unsigned i = 0; i != ThroughBlocks.size(); ++i) { - unsigned Number = ThroughBlocks[i]; - assert(B < GroupSize && "Array overflow"); - BCS[B].Number = Number; + for (unsigned i = 0; i != Blocks.size(); ++i) { + unsigned Number = Blocks[i]; Intf.moveToBlock(Number); - if (Intf.hasInterference()) { - // Interference for the live-in value. - if (Intf.first() <= Indexes->getMBBStartIdx(Number)) - BCS[B].Entry = SpillPlacement::MustSpill; - else - BCS[B].Entry = SpillPlacement::PrefSpill; - - // Interference for the live-out value. - if (Intf.last() >= SA->getLastSplitPoint(Number)) - BCS[B].Exit = SpillPlacement::MustSpill; - else - BCS[B].Exit = SpillPlacement::PrefSpill; - } else { - // No interference, transparent block. - BCS[B].Entry = BCS[B].Exit = SpillPlacement::DontCare; + if (!Intf.hasInterference()) { + assert(T < GroupSize && "Array overflow"); + TBS[T] = Number; + if (++T == GroupSize) { + SpillPlacer->addLinks(makeArrayRef(TBS, T)); + T = 0; + } + continue; } + assert(B < GroupSize && "Array overflow"); + BCS[B].Number = Number; + + // Interference for the live-in value. + if (Intf.first() <= Indexes->getMBBStartIdx(Number)) + BCS[B].Entry = SpillPlacement::MustSpill; + else + BCS[B].Entry = SpillPlacement::PrefSpill; + + // Interference for the live-out value. + if (Intf.last() >= SA->getLastSplitPoint(Number)) + BCS[B].Exit = SpillPlacement::MustSpill; + else + BCS[B].Exit = SpillPlacement::PrefSpill; + if (++B == GroupSize) { ArrayRef Array(BCS, B); SpillPlacer->addConstraints(Array); B = 0; - // Abort early when all hope is lost. - if (SpillPlacer->getPositiveNodes() == 0) - return false; } } ArrayRef Array(BCS, B); SpillPlacer->addConstraints(Array); - return SpillPlacer->getPositiveNodes() != 0; + SpillPlacer->addLinks(makeArrayRef(TBS, T)); +} + +void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { + // Keep track of through blocks that have not been added to SpillPlacer. + BitVector Todo = SA->getThroughBlocks(); + SmallVectorImpl &ActiveBlocks = Cand.ActiveBlocks; + unsigned AddedTo = 0; +#ifndef NDEBUG + unsigned Visited = 0; +#endif + + for (;;) { + ArrayRef NewBundles = SpillPlacer->getRecentPositive(); + // Find new through blocks in the periphery of PrefRegBundles. + for (int i = 0, e = NewBundles.size(); i != e; ++i) { + unsigned Bundle = NewBundles[i]; + // Look at all blocks connected to Bundle in the full graph. + ArrayRef Blocks = Bundles->getBlocks(Bundle); + for (ArrayRef::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + unsigned Block = *I; + if (!Todo.test(Block)) + continue; + Todo.reset(Block); + // This is a new through block. Add it to SpillPlacer later. + ActiveBlocks.push_back(Block); +#ifndef NDEBUG + ++Visited; +#endif + } + } + // Any new blocks to add? + if (ActiveBlocks.size() == AddedTo) + break; + + // Compute through constraints from the interference, or assume that all + // through blocks prefer spilling when forming compact regions. + ArrayRef NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + if (Cand.PhysReg) + addThroughConstraints(Cand.Intf, NewBlocks); + else + // Provide a strong negative bias on through blocks to prevent unwanted + // liveness on loop backedges. + SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); + AddedTo = ActiveBlocks.size(); + + // Perhaps iterating can enable more bundles? + SpillPlacer->iterate(); + } + DEBUG(dbgs() << ", v=" << Visited); } +/// calcCompactRegion - Compute the set of edge bundles that should be live +/// when splitting the current live range into compact regions. Compact +/// regions can be computed without looking at interference. They are the +/// regions formed by removing all the live-through blocks from the live range. +/// +/// Returns false if the current live range is already compact, or if the +/// compact regions would form single block regions anyway. +bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { + // Without any through blocks, the live range is already compact. + if (!SA->getNumThroughBlocks()) + return false; + + // Compact regions don't correspond to any physreg. + Cand.reset(IntfCache, 0); + + DEBUG(dbgs() << "Compact region bundles"); + + // Use the spill placer to determine the live bundles. GrowRegion pretends + // that all the through blocks have interference when PhysReg is unset. + SpillPlacer->prepare(Cand.LiveBundles); + + // The static split cost will be zero since Cand.Intf reports no interference. + float Cost; + if (!addSplitConstraints(Cand.Intf, Cost)) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + growRegion(Cand); + SpillPlacer->finish(); + + if (!Cand.LiveBundles.any()) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + DEBUG({ + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + return true; +} + +/// calcSpillCost - Compute how expensive it would be to split the live range in +/// SA around all use blocks instead of forming bundle regions. +float RAGreedy::calcSpillCost() { + float Cost = 0; + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + unsigned Number = BI.MBB->getNumber(); + // We normally only need one spill instruction - a load or a store. + Cost += SpillPlacer->getBlockFrequency(Number); + + // Unless the value is redefined in the block. + if (BI.LiveIn && BI.LiveOut && BI.FirstDef) + Cost += SpillPlacer->getBlockFrequency(Number); + } + return Cost; +} /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(unsigned PhysReg, - const BitVector &LiveBundles) { +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { float GlobalCost = 0; + const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -539,19 +905,16 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg, GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); } - InterferenceCache::Cursor Intf(IntfCache, PhysReg); - ArrayRef ThroughBlocks = SA->getThroughBlocks(); - SplitConstraints.resize(UseBlocks.size() + ThroughBlocks.size()); - for (unsigned i = 0; i != ThroughBlocks.size(); ++i) { - unsigned Number = ThroughBlocks[i]; + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; if (!RegIn && !RegOut) continue; if (RegIn && RegOut) { // We need double spill code if this block has interference. - Intf.moveToBlock(Number); - if (Intf.hasInterference()) + Cand.Intf.moveToBlock(Number); + if (Cand.Intf.hasInterference()) GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); continue; } @@ -561,229 +924,153 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg, return GlobalCost; } -/// splitAroundRegion - Split VirtReg around the region determined by -/// LiveBundles. Make an effort to avoid interference from PhysReg. +/// splitAroundRegion - Split the current live range around the regions +/// determined by BundleCand and GlobalCand. /// -/// The 'register' interval is going to contain as many uses as possible while -/// avoiding interference. The 'stack' interval is the complement constructed by -/// SplitEditor. It will contain the rest. +/// Before calling this function, GlobalCand and BundleCand must be initialized +/// so each bundle is assigned to a valid candidate, or NoCand for the +/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor +/// objects must be initialized for the current live range, and intervals +/// created for the used candidates. /// -void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, - const BitVector &LiveBundles, - SmallVectorImpl &NewVRegs) { - DEBUG({ - dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI) - << " with bundles"; - for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) - dbgs() << " EB#" << i; - dbgs() << ".\n"; - }); - - InterferenceCache::Cursor Intf(IntfCache, PhysReg); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - - // Create the main cross-block interval. - SE->openIntv(); - - // First add all defs that are live out of a block. +/// @param LREdit The LiveRangeEdit object handling the current split. +/// @param UsedCands List of used GlobalCand entries. Every BundleCand value +/// must appear in this list. +void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, + ArrayRef UsedCands) { + // These are the intervals created for new global ranges. We may create more + // intervals for local ranges. + const unsigned NumGlobalIntvs = LREdit.size(); + DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); + assert(NumGlobalIntvs && "No global intervals configured"); + + // Isolate even single instructions when dealing with a proper sub-class. + // That guarantees register class inflation for the stack interval because it + // is all copies. + unsigned Reg = SA->getParent().reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + + // First handle all the blocks with uses. ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; - - // Should the register be live out? - if (!BI.LiveOut || !RegOut) - continue; - - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#" - << Bundles->getBundle(BI.MBB->getNumber(), 1) - << " [" << Start << ';' - << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop - << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); - - // The interference interval should either be invalid or overlap MBB. - assert((!Intf.hasInterference() || Intf.first() < Stop) - && "Bad interference"); - assert((!Intf.hasInterference() || Intf.last() > Start) - && "Bad interference"); - - // Check interference leaving the block. - if (!Intf.hasInterference()) { - // Block is interference-free. - DEBUG(dbgs() << ", no interference"); - if (!BI.LiveThrough) { - DEBUG(dbgs() << ", not live-through.\n"); - SE->useIntv(SE->enterIntvBefore(BI.Def), Stop); - continue; + unsigned Number = BI.MBB->getNumber(); + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + if (BI.LiveIn) { + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); } - if (!RegIn) { - // Block is live-through, but entry bundle is on the stack. - // Reload just before the first use. - DEBUG(dbgs() << ", not live-in, enter before first use.\n"); - SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); - continue; + } + if (BI.LiveOut) { + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); } - DEBUG(dbgs() << ", live-through.\n"); - continue; } - // Block has interference. - DEBUG(dbgs() << ", interference to " << Intf.last()); - - if (!BI.LiveThrough && Intf.last() <= BI.Def) { - // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n'); - SE->useIntv(BI.Def, Stop); + // Create separate intervals for isolated blocks with multiple uses. + if (!IntvIn && !IntvOut) { + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); continue; } - SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - if (Intf.last().getBoundaryIndex() < BI.LastUse) { - // There are interference-free uses at the end of the block. - // Find the first use that can get the live-out register. - SmallVectorImpl::const_iterator UI = - std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - Intf.last().getBoundaryIndex()); - assert(UI != SA->UseSlots.end() && "Couldn't find last use"); - SlotIndex Use = *UI; - assert(Use <= BI.LastUse && "Couldn't find last use"); - // Only attempt a split befroe the last split point. - if (Use.getBaseIndex() <= LastSplitPoint) { - DEBUG(dbgs() << ", free use at " << Use << ".\n"); - SlotIndex SegStart = SE->enterIntvBefore(Use); - assert(SegStart >= Intf.last() && "Couldn't avoid interference"); - assert(SegStart < LastSplitPoint && "Impossible split point"); - SE->useIntv(SegStart, Stop); - continue; - } - } - - // Interference is after the last use. - DEBUG(dbgs() << " after last use.\n"); - SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB); - assert(SegStart >= Intf.last() && "Couldn't avoid interference"); + if (IntvIn && IntvOut) + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + else if (IntvIn) + SE->splitRegInBlock(BI, IntvIn, IntfIn); + else + SE->splitRegOutBlock(BI, IntvOut, IntfOut); } - // Now all defs leading to live bundles are handled, do everything else. - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + // Handle live-through blocks. The relevant live-through blocks are stored in + // the ActiveBlocks list with each candidate. We need to filter out + // duplicates. + BitVector Todo = SA->getThroughBlocks(); + for (unsigned c = 0; c != UsedCands.size(); ++c) { + ArrayRef Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + unsigned Number = Blocks[i]; + if (!Todo.test(Number)) + continue; + Todo.reset(Number); - // Is the register live-in? - if (!BI.LiveIn || !RegIn) - continue; + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; - // We have an incoming register. Check for interference. - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) - << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';' - << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop - << ')'); + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); + } - // Check interference entering the block. - if (!Intf.hasInterference()) { - // Block is interference-free. - DEBUG(dbgs() << ", no interference"); - if (!BI.LiveThrough) { - DEBUG(dbgs() << ", killed in block.\n"); - SE->useIntv(Start, SE->leaveIntvAfter(BI.Kill)); - continue; + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); } - if (!RegOut) { - SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - // Block is live-through, but exit bundle is on the stack. - // Spill immediately after the last use. - if (BI.LastUse < LastSplitPoint) { - DEBUG(dbgs() << ", uses, stack-out.\n"); - SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); - continue; - } - // The last use is after the last split point, it is probably an - // indirect jump. - DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point " - << LastSplitPoint << ", stack-out.\n"); - SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint); - SE->useIntv(Start, SegEnd); - // Run a double interval from the split to the last use. - // This makes it possible to spill the complement without affecting the - // indirect branch. - SE->overlapIntv(SegEnd, BI.LastUse); + if (!IntvIn && !IntvOut) continue; - } - // Register is live-through. - DEBUG(dbgs() << ", uses, live-through.\n"); - SE->useIntv(Start, Stop); - continue; + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); } + } + + ++NumGlobalSplits; - // Block has interference. - DEBUG(dbgs() << ", interference from " << Intf.first()); + SmallVector IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(Reg, LREdit.regs()); - if (!BI.LiveThrough && Intf.first() >= BI.Kill) { - // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n'); - SE->useIntv(Start, BI.Kill); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + unsigned OrigBlocks = SA->getNumLiveBlocks(); + + // Sort out the new intervals created by splitting. We get four kinds: + // - Remainder intervals should not be split again. + // - Candidate intervals can be assigned to Cand.PhysReg. + // - Block-local splits are candidates for local splitting. + // - DCE leftovers should go back on the queue. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &Reg = *LREdit.get(i); + + // Ignore old intervals from DCE. + if (getStage(Reg) != RS_New) continue; - } - if (Intf.first().getBaseIndex() > BI.FirstUse) { - // There are interference-free uses at the beginning of the block. - // Find the last use that can get the register. - SmallVectorImpl::const_iterator UI = - std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - Intf.first().getBaseIndex()); - assert(UI != SA->UseSlots.begin() && "Couldn't find first use"); - SlotIndex Use = (--UI)->getBoundaryIndex(); - DEBUG(dbgs() << ", free use at " << *UI << ".\n"); - SlotIndex SegEnd = SE->leaveIntvAfter(Use); - assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); - SE->useIntv(Start, SegEnd); + // Remainder interval. Don't try splitting again, spill if it doesn't + // allocate. + if (IntvMap[i] == 0) { + setStage(Reg, RS_Spill); continue; } - // Interference is before the first use. - DEBUG(dbgs() << " before first use.\n"); - SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); - } - - // Handle live-through blocks. - ArrayRef ThroughBlocks = SA->getThroughBlocks(); - for (unsigned i = 0; i != ThroughBlocks.size(); ++i) { - unsigned Number = ThroughBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; - DEBUG(dbgs() << "Live through BB#" << Number << '\n'); - if (RegIn && RegOut) { - Intf.moveToBlock(Number); - if (!Intf.hasInterference()) { - SE->useIntv(Indexes->getMBBStartIdx(Number), - Indexes->getMBBEndIdx(Number)); - continue; + // Global intervals. Allow repeated splitting as long as the number of live + // blocks is strictly decreasing. + if (IntvMap[i] < NumGlobalIntvs) { + if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { + DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks + << " blocks as original.\n"); + // Don't allow repeated splitting as a safe guard against looping. + setStage(Reg, RS_Split2); } + continue; } - MachineBasicBlock *MBB = MF->getBlockNumbered(Number); - if (RegIn) - SE->leaveIntvAtTop(*MBB); - if (RegOut) - SE->enterIntvAtEnd(*MBB); - } - - SE->closeIntv(); - // FIXME: Should we be more aggressive about splitting the stack region into - // per-block segments? The current approach allows the stack region to - // separate into connected components. Some components may be allocatable. - SE->finish(); - ++NumGlobalSplits; + // Other intervals are treated as new. This includes local intervals created + // for blocks with multiple uses, and anything created by DCE. + } if (VerifyEnabled) MF->verify(this, "After splitting live range around region"); @@ -791,60 +1078,178 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { - BitVector LiveBundles, BestBundles; - float BestCost = 0; - unsigned BestReg = 0; + unsigned NumCands = 0; + unsigned BestCand = NoCand; + float BestCost; + SmallVector UsedCands; + + // Check if we can split this live range around a compact region. + bool HasCompact = calcCompactRegion(GlobalCand.front()); + if (HasCompact) { + // Yes, keep GlobalCand[0] as the compact region candidate. + NumCands = 1; + BestCost = HUGE_VALF; + } else { + // No benefit from the compact region, our fallback will be per-block + // splitting. Make sure we find a solution that is cheaper than spilling. + BestCost = Hysteresis * calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + } Order.rewind(); - for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { - if (GlobalCand.size() <= Cand) - GlobalCand.resize(Cand+1); - GlobalCand[Cand].PhysReg = PhysReg; + while (unsigned PhysReg = Order.next()) { + // Discard bad candidates before we run out of interference cache cursors. + // This will only affect register classes with a lot of registers (>32). + if (NumCands == IntfCache.getMaxCursors()) { + unsigned WorstCount = ~0u; + unsigned Worst = 0; + for (unsigned i = 0; i != NumCands; ++i) { + if (i == BestCand || !GlobalCand[i].PhysReg) + continue; + unsigned Count = GlobalCand[i].LiveBundles.count(); + if (Count < WorstCount) + Worst = i, WorstCount = Count; + } + --NumCands; + GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; + } + + if (GlobalCand.size() <= NumCands) + GlobalCand.resize(NumCands+1); + GlobalSplitCandidate &Cand = GlobalCand[NumCands]; + Cand.reset(IntfCache, PhysReg); - SpillPlacer->prepare(LiveBundles); + SpillPlacer->prepare(Cand.LiveBundles); float Cost; - if (!addSplitConstraints(PhysReg, Cost)) { - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bias\n"); + if (!addSplitConstraints(Cand.Intf, Cost)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tbiased = " - << SpillPlacer->getPositiveNodes() << ", static = " << Cost); - if (BestReg && Cost >= BestCost) { - DEBUG(dbgs() << " worse than " << PrintReg(BestReg, TRI) << '\n'); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); + if (Cost >= BestCost) { + DEBUG({ + if (BestCand == NoCand) + dbgs() << " worse than no bundles\n"; + else + dbgs() << " worse than " + << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; + }); continue; } + growRegion(Cand); SpillPlacer->finish(); // No live bundles, defer to splitSingleBlocks(). - if (!LiveBundles.any()) { + if (!Cand.LiveBundles.any()) { DEBUG(dbgs() << " no bundles.\n"); continue; } - Cost += calcGlobalSplitCost(PhysReg, LiveBundles); + Cost += calcGlobalSplitCost(Cand); DEBUG({ dbgs() << ", total = " << Cost << " with bundles"; - for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); - if (!BestReg || Cost < BestCost) { - BestReg = PhysReg; - BestCost = 0.98f * Cost; // Prevent rounding effects. - BestBundles.swap(LiveBundles); + if (Cost < BestCost) { + BestCand = NumCands; + BestCost = Hysteresis * Cost; // Prevent rounding effects. } + ++NumCands; } - if (!BestReg) + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) return 0; - splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Region); + // Prepare split editor. + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit, SplitSpillMode); + + // Assign all edge bundles to the preferred candidate, or NoCand. + BundleCand.assign(Bundles->getNumBundles(), NoCand); + + // Assign bundles for the best candidate region. + if (BestCand != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[BestCand]; + if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { + UsedCands.push_back(BestCand); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " + << B << " bundles, intv " << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + // Assign bundles for the compact region. + if (HasCompact) { + GlobalSplitCandidate &Cand = GlobalCand.front(); + assert(!Cand.PhysReg && "Compact region has no physreg"); + if (unsigned B = Cand.getBundles(BundleCand, 0)) { + UsedCands.push_back(0); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " + << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + splitAroundRegion(LREdit, UsedCands); return 0; } +//===----------------------------------------------------------------------===// +// Per-Block Splitting +//===----------------------------------------------------------------------===// + +/// tryBlockSplit - Split a global live range around every block with uses. This +/// creates a lot of local live ranges, that will be split by tryLocalSplit if +/// they don't allocate. +unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { + assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); + unsigned Reg = VirtReg.reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit, SplitSpillMode); + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); + } + // No blocks were split. + if (LREdit.empty()) + return 0; + + // We did split for some blocks. + SmallVector IntvMap; + SE->finish(&IntvMap); + + // Tell LiveDebugVariables about the new ranges. + DebugVars->splitRegister(Reg, LREdit.regs()); + + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Sort out the new intervals created by splitting. The remainder interval + // goes straight to spilling, the new local ranges get to stay RS_New. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &LI = *LREdit.get(i); + if (getStage(LI) == RS_New && IntvMap[i] == 0) + setStage(LI, RS_Spill); + } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around basic blocks"); + return 0; +} + //===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// @@ -863,8 +1268,10 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, const unsigned NumGaps = Uses.size()-1; // Start and end points for the interference check. - SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse; - SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse; + SlotIndex StartIdx = + BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; + SlotIndex StopIdx = + BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; GapWeight.assign(NumGaps, 0.0f); @@ -874,8 +1281,8 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, .checkInterference()) continue; - // We know that VirtReg is a continuous interval from FirstUse to LastUse, - // so we don't need InterferenceQuery. + // We know that VirtReg is a continuous interval from FirstInstr to + // LastInstr, so we don't need InterferenceQuery. // // Interference that overlaps an instruction is counted in both gaps // surrounding the instruction. The exception is interference before @@ -903,47 +1310,6 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, } } -/// getPrevMappedIndex - Return the slot index of the last non-copy instruction -/// before MI that has a slot index. If MI is the first mapped instruction in -/// its block, return the block start index instead. -/// -SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) { - assert(MI && "Missing MachineInstr"); - const MachineBasicBlock *MBB = MI->getParent(); - MachineBasicBlock::const_iterator B = MBB->begin(), I = MI; - while (I != B) - if (!(--I)->isDebugValue() && !I->isCopy()) - return Indexes->getInstructionIndex(I); - return Indexes->getMBBStartIdx(MBB); -} - -/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous -/// real non-copy instruction for each instruction in SA->UseSlots. -/// -void RAGreedy::calcPrevSlots() { - const SmallVectorImpl &Uses = SA->UseSlots; - PrevSlot.clear(); - PrevSlot.reserve(Uses.size()); - for (unsigned i = 0, e = Uses.size(); i != e; ++i) { - const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]); - PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex()); - } -} - -/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may -/// be beneficial to split before UseSlots[i]. -/// -/// 0 is always a valid split point -unsigned RAGreedy::nextSplitPoint(unsigned i) { - const SmallVectorImpl &Uses = SA->UseSlots; - const unsigned Size = Uses.size(); - assert(i != Size && "No split points after the end"); - // Allow split before i when Uses[i] is not adjacent to the previous use. - while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex()) - ; - return i; -} - /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only /// basic block. /// @@ -956,8 +1322,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // while only covering a single block - A phi-def can use undef values from // predecessors, and the block could be a single-block loop. // We don't bother doing anything clever about such a case, we simply assume - // that the interval is continuous from FirstUse to LastUse. We should make - // sure that we don't do anything illegal to such an interval, though. + // that the interval is continuous from FirstInstr to LastInstr. We should + // make sure that we don't do anything illegal to such an interval, though. const SmallVectorImpl &Uses = SA->UseSlots; if (Uses.size() <= 2) @@ -971,11 +1337,27 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, dbgs() << '\n'; }); - // For every use, find the previous mapped non-copy instruction. - // We use this to detect valid split points, and to estimate new interval - // sizes. - calcPrevSlots(); + // Since we allow local split results to be split again, there is a risk of + // creating infinite loops. It is tempting to require that the new live + // ranges have less instructions than the original. That would guarantee + // convergence, but it is too strict. A live range with 3 instructions can be + // split 2+3 (including the COPY), and we want to allow that. + // + // Instead we use these rules: + // + // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the + // noop split, of course). + // 2. Require progress be made for ranges with getStage() == RS_Split2. All + // the new ranges must have fewer instructions than before the split. + // 3. New ranges with the same number of instructions are marked RS_Split2, + // smaller ranges are marked RS_New. + // + // These rules allow a 3 -> 2+3 split once, which we need. They also prevent + // excessive splitting and infinite loops. + // + bool ProgressRequired = getStage(VirtReg) >= RS_Split2; + // Best split candidate. unsigned BestBefore = NumGaps; unsigned BestAfter = 0; float BestDiff = 0; @@ -993,13 +1375,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // The new spill weight must be larger than any gap interference. // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. - unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1; + unsigned SplitBefore = 0, SplitAfter = 1; // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). // It is the spill weight that needs to be evicted. float MaxGap = GapWeight[0]; - for (unsigned i = 1; i != SplitAfter; ++i) - MaxGap = std::max(MaxGap, GapWeight[i]); for (;;) { // Live before/after split? @@ -1017,41 +1397,31 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, } // Should the interval be extended or shrunk? bool Shrink = true; - if (MaxGap < HUGE_VALF) { - // Estimate the new spill weight. - // - // Each instruction reads and writes the register, except the first - // instr doesn't read when !FirstLive, and the last instr doesn't write - // when !LastLive. - // - // We will be inserting copies before and after, so the total number of - // reads and writes is 2 * EstUses. - // - const unsigned EstUses = 2*(SplitAfter - SplitBefore) + - 2*(LiveBefore + LiveAfter); - // Try to guess the size of the new interval. This should be trivial, - // but the slot index of an inserted copy can be a lot smaller than the - // instruction it is inserted before if there are many dead indexes - // between them. - // - // We measure the distance from the instruction before SplitBefore to - // get a conservative estimate. - // - // The final distance can still be different if inserting copies - // triggers a slot index renumbering. + // How many gaps would the new range have? + unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; + + // Legally, without causing looping? + bool Legal = !ProgressRequired || NewGaps < NumGaps; + + if (Legal && MaxGap < HUGE_VALF) { + // Estimate the new spill weight. Each instruction reads or writes the + // register. Conservatively assume there are no read-modify-write + // instructions. // - const float EstWeight = normalizeSpillWeight(blockFreq * EstUses, - PrevSlot[SplitBefore].distance(Uses[SplitAfter])); + // Try to guess the size of the new interval. + const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), + Uses[SplitBefore].distance(Uses[SplitAfter]) + + (LiveBefore + LiveAfter)*SlotIndex::InstrDist); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. - float Diff = EstWeight - MaxGap; - DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff); - if (Diff > 0) { + DEBUG(dbgs() << " w=" << EstWeight); + if (EstWeight * Hysteresis >= MaxGap) { Shrink = false; + float Diff = EstWeight - MaxGap; if (Diff > BestDiff) { DEBUG(dbgs() << " (best)"); - BestDiff = Diff; + BestDiff = Hysteresis * Diff; BestBefore = SplitBefore; BestAfter = SplitAfter; } @@ -1060,8 +1430,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Try to shrink. if (Shrink) { - SplitBefore = nextSplitPoint(SplitBefore); - if (SplitBefore < SplitAfter) { + if (++SplitBefore < SplitAfter) { DEBUG(dbgs() << " shrink\n"); // Recompute the max when necessary. if (GapWeight[SplitBefore - 1] >= MaxGap) { @@ -1081,10 +1450,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, } DEBUG(dbgs() << " extend\n"); - for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1; - SplitAfter != e; ++SplitAfter) - MaxGap = std::max(MaxGap, GapWeight[SplitAfter]); - continue; + MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); } } @@ -1103,9 +1469,26 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); SE->useIntv(SegStart, SegStop); - SE->closeIntv(); - SE->finish(); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local); + SmallVector IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + + // If the new range has the same number of instructions as before, mark it as + // RS_Split2 so the next split will be forced to make progress. Otherwise, + // leave the new intervals as RS_New so they can compete. + bool LiveBefore = BestBefore != 0 || BI.LiveIn; + bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; + unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; + if (NewGaps >= NumGaps) { + DEBUG(dbgs() << "Tagging non-progress ranges: "); + assert(!ProgressRequired && "Didn't make progress when it was required."); + for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) + if (IntvMap[i] == 1) { + setStage(*LREdit.get(i), RS_Split2); + DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); + } + DEBUG(dbgs() << '\n'); + } ++NumLocalSplits; return 0; @@ -1120,6 +1503,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl&NewVRegs) { + // Ranges must be Split2 or less. + if (getStage(VirtReg) >= RS_Spill) + return 0; + // Local intervals are handled separately. if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); @@ -1129,36 +1516,30 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); - // Don't iterate global splitting. - // Move straight to spilling if this range was produced by a global split. - LiveRangeStage Stage = getStage(VirtReg); - if (Stage >= RS_Block) - return 0; - SA->analyze(&VirtReg); - // First try to split around a region spanning multiple blocks. - if (Stage < RS_Region) { - unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); - if (PhysReg || !NewVRegs.empty()) + // FIXME: SplitAnalysis may repair broken live ranges coming from the + // coalescer. That may cause the range to become allocatable which means that + // tryRegionSplit won't be making progress. This check should be replaced with + // an assertion when the coalescer is fixed. + if (SA->didRepairRange()) { + // VirtReg has changed, so all cached queries are invalid. + invalidateVirtRegs(); + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; } - // Then isolate blocks with multiple uses. - if (Stage < RS_Block) { - SplitAnalysis::BlockPtrSet Blocks; - if (SA->getMultiUseBlocks(Blocks)) { - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - SE->splitSingleBlocks(Blocks); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Block); - if (VerifyEnabled) - MF->verify(this, "After splitting live range around basic blocks"); - } + // First try to split around a region spanning multiple blocks. RS_Split2 + // ranges already made dubious progress with region splitting, so they go + // straight to single block splitting. + if (getStage(VirtReg) < RS_Split2) { + unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; } - // Don't assign any physregs. - return 0; + // Then isolate blocks. + return tryBlockSplit(VirtReg, Order, NewVRegs); } @@ -1169,29 +1550,37 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { // First try assigning a free register. - AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs); - while (unsigned PhysReg = Order.next()) { - if (!checkPhysRegInterference(VirtReg, PhysReg)) - return PhysReg; - } - - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; + LiveRangeStage Stage = getStage(VirtReg); + DEBUG(dbgs() << StageName[Stage] + << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); + + // Try to evict a less worthy live range, but only for ranges from the primary + // queue. The RS_Split ranges already failed to do this, and they should not + // get a second chance until they have been split. + if (Stage != RS_Split) + if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) + return PhysReg; + assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); // The first time we see a live range, don't try to split or spill. // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. - LiveRangeStage Stage = getStage(VirtReg); - if (Stage == RS_First) { - LRStage[VirtReg.reg] = RS_Second; + if (Stage < RS_Split) { + setStage(VirtReg, RS_Split); DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(&VirtReg); return 0; } - assert(Stage < RS_Spill && "Cannot allocate after spilling"); + // If we couldn't allocate a register from spilling, there is probably some + // invalid inline assembly. The base class wil report it. + if (Stage >= RS_Done || !VirtReg.isSpillable()) + return ~0u; // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); @@ -1202,7 +1591,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); LiveRangeEdit LRE(VirtReg, NewVRegs, this); spiller().spill(LRE); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); if (VerifyEnabled) MF->verify(this, "After spilling"); @@ -1224,18 +1613,19 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis(), getAnalysis()); Indexes = &getAnalysis(); DomTree = &getAnalysis(); - ReservedRegs = TRI->getReservedRegs(*MF); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis(); - LoopRanges = &getAnalysis(); Bundles = &getAnalysis(); SpillPlacer = &getAnalysis(); + DebugVars = &getAnalysis(); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); - LRStage.clear(); - LRStage.resize(MRI->getNumVirtRegs()); + ExtraRegInfo.clear(); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + NextCascade = 1; IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); + GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); addMBBLiveIns(MF); @@ -1248,7 +1638,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { } // Write out new DBG_VALUE instructions. - getAnalysis().emitDebugValues(VRM); + { + NamedRegionTimer T("Emit Debug Info", TimerGroupName, TimePassesIsEnabled); + DebugVars->emitDebugValues(VRM); + } // The pass output is in VirtRegMap. Release all the transient data. releaseMemory();