X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocGreedy.cpp;h=06f69c1e0d16fc6199ad961765807d19556321b0;hb=ebd7eabca4c090175b71f221e880f8bd937a4523;hp=4728a050b17a9d7c535a3945c3a4b2516093cfb9;hpb=4fcfcf498436533ea9bc59404d10751bc76cfd2c;p=oota-llvm.git diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 4728a050b17..06f69c1e0d1 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -16,29 +16,28 @@ #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" +#include "LiveRegMatrix.h" #include "RegAllocBase.h" #include "Spiller.h" #include "SpillPlacement.h" #include "SplitKit.h" #include "VirtRegMap.h" -#include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineLoopRanges.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -52,6 +51,15 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); +static cl::opt +SplitSpillMode("split-spill-mode", cl::Hidden, + cl::desc("Spill mode for splitting live ranges"), + cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), + clEnumValEnd), + cl::init(SplitEditor::SM_Partition)); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -65,10 +73,8 @@ class RAGreedy : public MachineFunctionPass, // analyses SlotIndexes *Indexes; - LiveStacks *LS; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; - MachineLoopRanges *LoopRanges; EdgeBundles *Bundles; SpillPlacement *SpillPlacer; LiveDebugVariables *DebugVars; @@ -92,12 +98,26 @@ class RAGreedy : public MachineFunctionPass, // range splitting algorithm terminates, something that is otherwise hard to // ensure. enum LiveRangeStage { - RS_New, ///< Never seen before. - RS_First, ///< First time in the queue. - RS_Second, ///< Second time in the queue. - RS_Global, ///< Produced by global splitting. - RS_Local, ///< Produced by local splitting. - RS_Spill ///< Produced by spilling. + /// Newly created live range that has never been queued. + RS_New, + + /// Only attempt assignment and eviction. Then requeue as RS_Split. + RS_Assign, + + /// Attempt live range splitting if assignment is impossible. + RS_Split, + + /// Attempt more aggressive live range splitting that is guaranteed to make + /// progress. This is used for split products that may not be making + /// progress. + RS_Split2, + + /// Live range will be spilled. No more splitting will be attempted. + RS_Spill, + + /// There is nothing more we can do to this live range. Abort compilation + /// if it can't be assigned. + RS_Done }; static const char *const StageName[]; @@ -159,17 +179,38 @@ class RAGreedy : public MachineFunctionPass, /// Global live range splitting candidate info. struct GlobalSplitCandidate { + // Register intended for assignment, or 0. unsigned PhysReg; + + // SplitKit interval index for this candidate. + unsigned IntvIdx; + + // Interference for PhysReg. InterferenceCache::Cursor Intf; + + // Bundles where this candidate should be live. BitVector LiveBundles; SmallVector ActiveBlocks; void reset(InterferenceCache &Cache, unsigned Reg) { PhysReg = Reg; + IntvIdx = 0; Intf.setPhysReg(Cache, Reg); LiveBundles.clear(); ActiveBlocks.clear(); } + + // Set B[i] = C for every live bundle where B[i] was NoCand. + unsigned getBundles(SmallVectorImpl &B, unsigned C) { + unsigned Count = 0; + for (int i = LiveBundles.find_first(); i >= 0; + i = LiveBundles.find_next(i)) + if (B[i] == NoCand) { + B[i] = C; + Count++; + } + return Count; + } }; /// Candidate info for for each PhysReg in AllocationOrder. @@ -177,6 +218,12 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector GlobalCand; + enum { NoCand = ~0u }; + + /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to + /// NoCand which indicates the stack interval. + SmallVector BundleCand; + public: RAGreedy(); @@ -200,7 +247,6 @@ public: static char ID; private: - void LRE_WillEraseInstruction(MachineInstr*); bool LRE_CanEraseVirtReg(unsigned); void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); @@ -210,8 +256,8 @@ private: void addThroughConstraints(InterferenceCache::Cursor, ArrayRef); void growRegion(GlobalSplitCandidate &Cand); float calcGlobalSplitCost(GlobalSplitCandidate&); - void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, - SmallVectorImpl&); + bool calcCompactRegion(GlobalSplitCandidate&); + void splitAroundRegion(LiveRangeEdit&, ArrayRef); void calcGapWeights(unsigned, SmallVectorImpl&); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); @@ -224,6 +270,10 @@ private: SmallVectorImpl&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); + unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned trySplit(LiveInterval&, AllocationOrder&, @@ -235,12 +285,12 @@ char RAGreedy::ID = 0; #ifndef NDEBUG const char *const RAGreedy::StageName[] = { - "RS_New", - "RS_First", - "RS_Second", - "RS_Global", - "RS_Local", - "RS_Spill" + "RS_New", + "RS_Assign", + "RS_Split", + "RS_Split2", + "RS_Spill", + "RS_Done" }; #endif @@ -258,14 +308,14 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); } @@ -275,24 +325,22 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitive(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); @@ -303,14 +351,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { - // LRE itself will remove from SlotIndexes and parent basic block. - VRM->RemoveMachineInstrFromMaps(MI); -} - bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { - if (unsigned PhysReg = VRM->getPhys(VirtReg)) { - unassign(LIS->getInterval(VirtReg), PhysReg); + if (VRM->hasPhys(VirtReg)) { + Matrix->unassign(LIS->getInterval(VirtReg)); return true; } // Unassigned virtreg is probably in the priority queue. @@ -319,20 +362,25 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { } void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { - unsigned PhysReg = VRM->getPhys(VirtReg); - if (!PhysReg) + if (!VRM->hasPhys(VirtReg)) return; // Register is assigned, put it back on the queue for reassignment. LiveInterval &LI = LIS->getInterval(VirtReg); - unassign(LI, PhysReg); + Matrix->unassign(LI); enqueue(&LI); } void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // Cloning a register we haven't even heard about yet? Just ignore it. + if (!ExtraRegInfo.inBounds(Old)) + return; + // LRE may clone a virtual register because dead code elimination causes it to - // be split into connected components. Ensure that the new register gets the + // be split into connected components. The new components are much smaller + // than the original, so they should get a new chance at being assigned. // same stage as the parent. + ExtraRegInfo[Old].Stage = RS_Assign; ExtraRegInfo.grow(New); ExtraRegInfo[New] = ExtraRegInfo[Old]; } @@ -341,7 +389,6 @@ void RAGreedy::releaseMemory() { SpillerInstance.reset(0); ExtraRegInfo.clear(); GlobalCand.clear(); - RegAllocBase::releaseMemory(); } void RAGreedy::enqueue(LiveInterval *LI) { @@ -355,16 +402,15 @@ void RAGreedy::enqueue(LiveInterval *LI) { ExtraRegInfo.grow(Reg); if (ExtraRegInfo[Reg].Stage == RS_New) - ExtraRegInfo[Reg].Stage = RS_First; + ExtraRegInfo[Reg].Stage = RS_Assign; - if (ExtraRegInfo[Reg].Stage == RS_Second) + if (ExtraRegInfo[Reg].Stage == RS_Split) { // Unsplit ranges that couldn't be allocated immediately are deferred until - // everything else has been allocated. Long ranges are allocated last so - // they are split against realistic interference. - Prio = (1u << 31) - Size; - else { - // Everything else is allocated in long->short order. Long ranges that don't - // fit should be spilled ASAP so they don't create interference. + // everything else has been allocated. + Prio = Size; + } else { + // Everything is allocated in long->short order. Long ranges that don't fit + // should be spilled (or split) ASAP so they don't create interference. Prio = (1u << 31) + Size; // Boost ranges that have a physical register hint. @@ -372,13 +418,13 @@ void RAGreedy::enqueue(LiveInterval *LI) { Prio |= (1u << 30); } - Queue.push(std::make_pair(Prio, Reg)); + Queue.push(std::make_pair(Prio, ~Reg)); } LiveInterval *RAGreedy::dequeue() { if (Queue.empty()) return 0; - LiveInterval *LI = &LIS->getInterval(Queue.top().second); + LiveInterval *LI = &LIS->getInterval(~Queue.top().second); Queue.pop(); return LI; } @@ -395,7 +441,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, Order.rewind(); unsigned PhysReg; while ((PhysReg = Order.next())) - if (!checkPhysRegInterference(VirtReg, PhysReg)) + if (!Matrix->checkInterference(VirtReg, PhysReg)) break; if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; @@ -447,7 +493,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, /// @param BreaksHint True when B is already assigned to its preferred register. bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, LiveInterval &B, bool BreaksHint) { - bool CanSplit = getStage(B) <= RS_Second; + bool CanSplit = getStage(B) < RS_Spill; // Be fairly aggressive about following hints as long as the evictee can be // split. @@ -462,12 +508,16 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. -/// @prarm IsHint True when PhysReg is VirtReg's preferred register. +/// @param IsHint True when PhysReg is VirtReg's preferred register. /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, bool IsHint, EvictionCost &MaxCost) { + // It is only possible to evict virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) + return false; + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -480,8 +530,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, Cascade = NextCascade; EvictionCost Cost; - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is 10 or more interferences, chances are one is heavier. if (Q.collectInterferingVRegs(10) >= 10) return false; @@ -489,15 +539,21 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Check if any interfering live range is heavier than MaxWeight. for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) - return false; + assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + "Only expecting virtual register interference from query"); // Never evict spill products. They cannot split or spill. - if (getStage(*Intf) == RS_Spill) + if (getStage(*Intf) == RS_Done) return false; // Once a live range becomes small enough, it is urgent that we find a // register for it. This is indicated by an infinite spill weight. These // urgent live ranges get to evict almost anything. - bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable(); + // + // Also allow urgent evictions of unspillable ranges from a strictly + // larger allocation order. + bool Urgent = !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); // Only evict older cascades or live ranges without a cascade. unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; if (Cascade <= IntfCascade) { @@ -538,19 +594,29 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + + // Collect all interfering virtregs first. + SmallVector Intfs; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); assert(Q.seenAllInterferences() && "Didn't check all interfererences."); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; - unassign(*Intf, VRM->getPhys(Intf->reg)); - assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || - VirtReg.isSpillable() < Intf->isSpillable()) && - "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg].Cascade = Cascade; - ++NumEvicted; - NewVRegs.push_back(Intf); - } + ArrayRef IVR = Q.interferingVRegs(); + Intfs.append(IVR.begin(), IVR.end()); + } + + // Evict them second. This will invalidate the queries. + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval *Intf = Intfs[i]; + // The same VirtReg may be present in multiple RegUnits. Skip duplicates. + if (!VRM->hasPhys(Intf->reg)) + continue; + Matrix->unassign(*Intf); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); } } @@ -632,6 +698,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.ChangesValue = BI.FirstDef; if (!Intf.hasInterference()) continue; @@ -643,9 +710,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, if (BI.LiveIn) { if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) BC.Entry = SpillPlacement::MustSpill, ++Ins; - else if (Intf.first() < BI.FirstUse) + else if (Intf.first() < BI.FirstInstr) BC.Entry = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.first() < BI.LastUse) + else if (Intf.first() < BI.LastInstr) ++Ins; } @@ -653,9 +720,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, if (BI.LiveOut) { if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) BC.Exit = SpillPlacement::MustSpill, ++Ins; - else if (Intf.last() > BI.LastUse) + else if (Intf.last() > BI.LastInstr) BC.Exit = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.last() > BI.FirstUse) + else if (Intf.last() > BI.FirstInstr) ++Ins; } @@ -689,7 +756,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, assert(T < GroupSize && "Array overflow"); TBS[T] = Number; if (++T == GroupSize) { - SpillPlacer->addLinks(ArrayRef(TBS, T)); + SpillPlacer->addLinks(makeArrayRef(TBS, T)); T = 0; } continue; @@ -719,7 +786,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, ArrayRef Array(BCS, B); SpillPlacer->addConstraints(Array); - SpillPlacer->addLinks(ArrayRef(TBS, T)); + SpillPlacer->addLinks(makeArrayRef(TBS, T)); } void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { @@ -754,8 +821,16 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Any new blocks to add? if (ActiveBlocks.size() == AddedTo) break; - addThroughConstraints(Cand.Intf, - ArrayRef(ActiveBlocks).slice(AddedTo)); + + // Compute through constraints from the interference, or assume that all + // through blocks prefer spilling when forming compact regions. + ArrayRef NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + if (Cand.PhysReg) + addThroughConstraints(Cand.Intf, NewBlocks); + else + // Provide a strong negative bias on through blocks to prevent unwanted + // liveness on loop backedges. + SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); AddedTo = ActiveBlocks.size(); // Perhaps iterating can enable more bundles? @@ -764,11 +839,55 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { DEBUG(dbgs() << ", v=" << Visited); } +/// calcCompactRegion - Compute the set of edge bundles that should be live +/// when splitting the current live range into compact regions. Compact +/// regions can be computed without looking at interference. They are the +/// regions formed by removing all the live-through blocks from the live range. +/// +/// Returns false if the current live range is already compact, or if the +/// compact regions would form single block regions anyway. +bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { + // Without any through blocks, the live range is already compact. + if (!SA->getNumThroughBlocks()) + return false; + + // Compact regions don't correspond to any physreg. + Cand.reset(IntfCache, 0); + + DEBUG(dbgs() << "Compact region bundles"); + + // Use the spill placer to determine the live bundles. GrowRegion pretends + // that all the through blocks have interference when PhysReg is unset. + SpillPlacer->prepare(Cand.LiveBundles); + + // The static split cost will be zero since Cand.Intf reports no interference. + float Cost; + if (!addSplitConstraints(Cand.Intf, Cost)) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + growRegion(Cand); + SpillPlacer->finish(); + + if (!Cand.LiveBundles.any()) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + DEBUG({ + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + return true; +} + /// calcSpillCost - Compute how expensive it would be to split the live range in /// SA around all use blocks instead of forming bundle regions. float RAGreedy::calcSpillCost() { float Cost = 0; - const LiveInterval &LI = SA->getParent(); ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -777,16 +896,8 @@ float RAGreedy::calcSpillCost() { Cost += SpillPlacer->getBlockFrequency(Number); // Unless the value is redefined in the block. - if (BI.LiveIn && BI.LiveOut) { - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(Number); - LiveInterval::const_iterator I = LI.find(Start); - assert(I != LI.end() && "Expected live-in value"); - // Is there a different live-out value? If so, we need an extra spill - // instruction. - if (I->end < Stop) - Cost += SpillPlacer->getBlockFrequency(Number); - } + if (BI.LiveIn && BI.LiveOut && BI.FirstDef) + Cost += SpillPlacer->getBlockFrequency(Number); } return Cost; } @@ -833,372 +944,115 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { return GlobalCost; } -/// splitAroundRegion - Split VirtReg around the region determined by -/// LiveBundles. Make an effort to avoid interference from PhysReg. +/// splitAroundRegion - Split the current live range around the regions +/// determined by BundleCand and GlobalCand. /// -/// The 'register' interval is going to contain as many uses as possible while -/// avoiding interference. The 'stack' interval is the complement constructed by -/// SplitEditor. It will contain the rest. +/// Before calling this function, GlobalCand and BundleCand must be initialized +/// so each bundle is assigned to a valid candidate, or NoCand for the +/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor +/// objects must be initialized for the current live range, and intervals +/// created for the used candidates. /// -void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, - GlobalSplitCandidate &Cand, - SmallVectorImpl &NewVRegs) { - const BitVector &LiveBundles = Cand.LiveBundles; - - DEBUG({ - dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI) - << " with bundles"; - for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) - dbgs() << " EB#" << i; - dbgs() << ".\n"; - }); - - InterferenceCache::Cursor &Intf = Cand.Intf; - - // FIXME: We need cache reference counts to guarantee that Intf hasn't been - // clobbered. - Intf.setPhysReg(IntfCache, Cand.PhysReg); - - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - - // Create the main cross-block interval. - const unsigned MainIntv = SE->openIntv(); +/// @param LREdit The LiveRangeEdit object handling the current split. +/// @param UsedCands List of used GlobalCand entries. Every BundleCand value +/// must appear in this list. +void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, + ArrayRef UsedCands) { + // These are the intervals created for new global ranges. We may create more + // intervals for local ranges. + const unsigned NumGlobalIntvs = LREdit.size(); + DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); + assert(NumGlobalIntvs && "No global intervals configured"); + + // Isolate even single instructions when dealing with a proper sub-class. + // That guarantees register class inflation for the stack interval because it + // is all copies. + unsigned Reg = SA->getParent().reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); // First handle all the blocks with uses. ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = BI.LiveIn && - LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = BI.LiveOut && - LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; - - // Create separate intervals for isolated blocks with multiple uses. - // - // |---o---o---| Enter and leave on the stack. - // ____-----____ Create local interval for uses. - // - // | o---o---| Defined in block, leave on stack. - // -----____ Create local interval for uses. - // - // |---o---x | Enter on stack, killed in block. - // ____----- Create local interval for uses. - // - if (!RegIn && !RegOut) { - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); - if (!BI.isOneInstr()) { - SE->splitSingleBlock(BI); - SE->selectIntv(MainIntv); + unsigned Number = BI.MBB->getNumber(); + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + if (BI.LiveIn) { + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); } - continue; } - - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) - << (BI.LiveIn ? (RegIn ? " => " : " -> ") : " ") - << "BB#" << BI.MBB->getNumber() - << (BI.LiveOut ? (RegOut ? " => " : " -> ") : " ") - << " EB#" << Bundles->getBundle(BI.MBB->getNumber(), 1) - << " [" << Start << ';' - << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop - << ") uses [" << BI.FirstUse << ';' << BI.LastUse - << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); - - // The interference interval should either be invalid or overlap MBB. - assert((!Intf.hasInterference() || Intf.first() < Stop) - && "Bad interference"); - assert((!Intf.hasInterference() || Intf.last() > Start) - && "Bad interference"); - - // We are now ready to decide where to split in the current block. There - // are many variables guiding the decision: - // - // - RegIn / RegOut: The global splitting algorithm's decisions for our - // ingoing and outgoing bundles. - // - // - BI.BlockIn / BI.BlockOut: Is the live range live-in and/or live-out - // from this block. - // - // - Intf.hasInterference(): Is there interference in this block. - // - // - Intf.first() / Inft.last(): The range of interference. - // - // The live range should be split such that MainIntv is live-in when RegIn - // is set, and live-out when RegOut is set. MainIntv should never overlap - // the interference, and the stack interval should never have more than one - // use per block. - - // No splits can be inserted after LastSplitPoint, overlap instead. - SlotIndex LastSplitPoint = Stop; - if (BI.LiveOut) - LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - - // At this point, we know that either RegIn or RegOut is set. We dealt with - // the all-stack case above. - - // Blocks without interference are relatively easy. - if (!Intf.hasInterference()) { - DEBUG(dbgs() << ", no interference.\n"); - SE->selectIntv(MainIntv); - // The easiest case has MainIntv live through. - // - // |---o---o---| Live-in, live-out. - // ============= Use MainIntv everywhere. - // - SlotIndex From = Start, To = Stop; - - // Block entry. Reload before the first use if MainIntv is not live-in. - // - // |---o-- Enter on stack. - // ____=== Reload before first use. - // - // | o-- Defined in block. - // === Use MainIntv from def. - // - if (!RegIn) - From = SE->enterIntvBefore(BI.FirstUse); - - // Block exit. Handle cases where MainIntv is not live-out. - if (!BI.LiveOut) - // - // --x | Killed in block. - // === Use MainIntv up to kill. - // - To = SE->leaveIntvAfter(BI.LastUse); - else if (!RegOut) { - // - // --o---| Live-out on stack. - // ===____ Use MainIntv up to last use, switch to stack. - // - // -----o| Live-out on stack, last use after last split point. - // ====== Extend MainIntv to last use, overlapping. - // \____ Copy to stack interval before last split point. - // - if (BI.LastUse < LastSplitPoint) - To = SE->leaveIntvAfter(BI.LastUse); - else { - // The last use is after the last split point, it is probably an - // indirect branch. - To = SE->leaveIntvBefore(LastSplitPoint); - // Run a double interval from the split to the last use. This makes - // it possible to spill the complement without affecting the indirect - // branch. - SE->overlapIntv(To, BI.LastUse); - } + if (BI.LiveOut) { + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); } - - // Paint in MainIntv liveness for this block. - SE->useIntv(From, To); - continue; - } - - // We are now looking at a block with interference, and we know that either - // RegIn or RegOut is set. - assert(Intf.hasInterference() && (RegIn || RegOut) && "Bad invariant"); - - // If the live range is not live through the block, it is possible that the - // interference doesn't even overlap. Deal with those cases first. Since - // no copy instructions are required, we can tolerate interference starting - // or ending at the same instruction that kills or defines our live range. - - // Live-in, killed before interference. - // - // ~~~ Interference after kill. - // |---o---x | Killed in block. - // ========= Use MainIntv everywhere. - // - if (RegIn && !BI.LiveOut && BI.LastUse <= Intf.first()) { - DEBUG(dbgs() << ", live-in, killed before interference.\n"); - SE->selectIntv(MainIntv); - SlotIndex To = SE->leaveIntvAfter(BI.LastUse); - SE->useIntv(Start, To); - continue; } - // Live-out, defined after interference. - // - // ~~~ Interference before def. - // | o---o---| Defined in block. - // ========= Use MainIntv everywhere. - // - if (RegOut && !BI.LiveIn && BI.FirstUse >= Intf.last()) { - DEBUG(dbgs() << ", live-out, defined after interference.\n"); - SE->selectIntv(MainIntv); - SlotIndex From = SE->enterIntvBefore(BI.FirstUse); - SE->useIntv(From, Stop); + // Create separate intervals for isolated blocks with multiple uses. + if (!IntvIn && !IntvOut) { + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); continue; } - // The interference is now known to overlap the live range, but it may - // still be easy to avoid if all the interference is on one side of the - // uses, and we enter or leave on the stack. + if (IntvIn && IntvOut) + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + else if (IntvIn) + SE->splitRegInBlock(BI, IntvIn, IntfIn); + else + SE->splitRegOutBlock(BI, IntvOut, IntfOut); + } - // Live-out on stack, interference after last use. - // - // ~~~ Interference after last use. - // |---o---o---| Live-out on stack. - // =========____ Leave MainIntv after last use. - // - // ~ Interference after last use. - // |---o---o--o| Live-out on stack, late last use. - // ============ Copy to stack after LSP, overlap MainIntv. - // \_____ Stack interval is live-out. - // - if (!RegOut && Intf.first() > BI.LastUse.getBoundaryIndex()) { - assert(RegIn && "Stack-in, stack-out should already be handled"); - if (BI.LastUse < LastSplitPoint) { - DEBUG(dbgs() << ", live-in, stack-out, interference after last use.\n"); - SE->selectIntv(MainIntv); - SlotIndex To = SE->leaveIntvAfter(BI.LastUse); - assert(To <= Intf.first() && "Expected to avoid interference"); - SE->useIntv(Start, To); - } else { - DEBUG(dbgs() << ", live-in, stack-out, avoid last split point\n"); - SE->selectIntv(MainIntv); - SlotIndex To = SE->leaveIntvBefore(LastSplitPoint); - assert(To <= Intf.first() && "Expected to avoid interference"); - SE->overlapIntv(To, BI.LastUse); - SE->useIntv(Start, To); - } - continue; - } + // Handle live-through blocks. The relevant live-through blocks are stored in + // the ActiveBlocks list with each candidate. We need to filter out + // duplicates. + BitVector Todo = SA->getThroughBlocks(); + for (unsigned c = 0; c != UsedCands.size(); ++c) { + ArrayRef Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + unsigned Number = Blocks[i]; + if (!Todo.test(Number)) + continue; + Todo.reset(Number); - // Live-in on stack, interference before first use. - // - // ~~~ Interference before first use. - // |---o---o---| Live-in on stack. - // ____========= Enter MainIntv before first use. - // - if (!RegIn && Intf.last() < BI.FirstUse.getBaseIndex()) { - assert(RegOut && "Stack-in, stack-out should already be handled"); - DEBUG(dbgs() << ", stack-in, interference before first use.\n"); - SE->selectIntv(MainIntv); - SlotIndex From = SE->enterIntvBefore(BI.FirstUse); - assert(From >= Intf.last() && "Expected to avoid interference"); - SE->useIntv(From, Stop); - continue; - } + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; - // The interference is overlapping somewhere we wanted to use MainIntv. That - // means we need to create a local interval that can be allocated a - // different register. - unsigned LocalIntv = SE->openIntv(); - DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); - - // We may be creating copies directly between MainIntv and LocalIntv, - // bypassing the stack interval. When we do that, we should never use the - // leaveIntv* methods as they define values in the stack interval. By - // starting from the end of the block and working our way backwards, we can - // get by with only enterIntv* methods. - // - // When selecting split points, we generally try to maximize the stack - // interval as long at it contains no uses, maximize the main interval as - // long as it doesn't overlap interference, and minimize the local interval - // that we don't know how to allocate yet. - - // Handle the block exit, set Pos to the first handled slot. - SlotIndex Pos = BI.LastUse; - if (RegOut) { - assert(Intf.last() < LastSplitPoint && "Cannot be live-out in register"); - // Create a snippet of MainIntv that is live-out. - // - // ~~~ Interference overlapping uses. - // --o---| Live-out in MainIntv. - // ----=== Switch from LocalIntv to MainIntv after interference. - // - SE->selectIntv(MainIntv); - Pos = SE->enterIntvAfter(Intf.last()); - assert(Pos >= Intf.last() && "Expected to avoid interference"); - SE->useIntv(Pos, Stop); - SE->selectIntv(LocalIntv); - } else if (BI.LiveOut) { - if (BI.LastUse < LastSplitPoint) { - // Live-out on the stack. - // - // ~~~ Interference overlapping uses. - // --o---| Live-out on stack. - // ---____ Switch from LocalIntv to stack after last use. - // - Pos = SE->leaveIntvAfter(BI.LastUse); - } else { - // Live-out on the stack, last use after last split point. - // - // ~~~ Interference overlapping uses. - // --o--o| Live-out on stack, late use. - // ------ Copy to stack before LSP, overlap LocalIntv. - // \__ - // - Pos = SE->leaveIntvBefore(LastSplitPoint); - // We need to overlap LocalIntv so it can reach LastUse. - SE->overlapIntv(Pos, BI.LastUse); + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); } - } - - // When not live-out, leave Pos at LastUse. We have handled everything from - // Pos to Stop. Find the starting point for LocalIntv. - assert(SE->currentIntv() == LocalIntv && "Expecting local interval"); - - if (RegIn) { - assert(Start < Intf.first() && "Cannot be live-in with interference"); - // Live-in in MainIntv, only use LocalIntv for interference. - // - // ~~~ Interference overlapping uses. - // |---o-- Live-in in MainIntv. - // ====--- Switch to LocalIntv before interference. - // - SlotIndex Switch = SE->enterIntvBefore(std::min(Pos, Intf.first())); - assert(Switch <= Intf.first() && "Expected to avoid interference"); - SE->useIntv(Switch, Pos); - SE->selectIntv(MainIntv); - SE->useIntv(Start, Switch); - } else { - // Live-in on stack, enter LocalIntv before first use. - // - // ~~~ Interference overlapping uses. - // |---o-- Live-in in MainIntv. - // ____--- Reload to LocalIntv before interference. - // - // Defined in block. - // - // ~~~ Interference overlapping uses. - // | o-- Defined in block. - // --- Begin LocalIntv at first use. - // - SlotIndex Switch = SE->enterIntvBefore(std::min(Pos, BI.FirstUse)); - SE->useIntv(Switch, Pos); - } - } - // Handle live-through blocks. - SE->selectIntv(MainIntv); - for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { - unsigned Number = Cand.ActiveBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; - DEBUG(dbgs() << "Live through BB#" << Number << '\n'); - if (RegIn && RegOut) { - Intf.moveToBlock(Number); - if (!Intf.hasInterference()) { - SE->useIntv(Indexes->getMBBStartIdx(Number), - Indexes->getMBBEndIdx(Number)); - continue; + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); } + if (!IntvIn && !IntvOut) + continue; + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); } - MachineBasicBlock *MBB = MF->getBlockNumbered(Number); - if (RegIn) - SE->leaveIntvAtTop(*MBB); - if (RegOut) - SE->enterIntvAtEnd(*MBB); } ++NumGlobalSplits; SmallVector IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs()); ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); @@ -1218,18 +1072,18 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, // Remainder interval. Don't try splitting again, spill if it doesn't // allocate. if (IntvMap[i] == 0) { - setStage(Reg, RS_Global); + setStage(Reg, RS_Spill); continue; } - // Main interval. Allow repeated splitting as long as the number of live + // Global intervals. Allow repeated splitting as long as the number of live // blocks is strictly decreasing. - if (IntvMap[i] == MainIntv) { + if (IntvMap[i] < NumGlobalIntvs) { if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks << " blocks as original.\n"); // Don't allow repeated splitting as a safe guard against looping. - setStage(Reg, RS_Global); + setStage(Reg, RS_Split2); } continue; } @@ -1244,14 +1098,44 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { - float BestCost = Hysteresis * calcSpillCost(); - DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); - const unsigned NoCand = ~0u; - unsigned BestCand = NoCand; unsigned NumCands = 0; + unsigned BestCand = NoCand; + float BestCost; + SmallVector UsedCands; + + // Check if we can split this live range around a compact region. + bool HasCompact = calcCompactRegion(GlobalCand.front()); + if (HasCompact) { + // Yes, keep GlobalCand[0] as the compact region candidate. + NumCands = 1; + BestCost = HUGE_VALF; + } else { + // No benefit from the compact region, our fallback will be per-block + // splitting. Make sure we find a solution that is cheaper than spilling. + BestCost = Hysteresis * calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + } Order.rewind(); while (unsigned PhysReg = Order.next()) { + // Discard bad candidates before we run out of interference cache cursors. + // This will only affect register classes with a lot of registers (>32). + if (NumCands == IntfCache.getMaxCursors()) { + unsigned WorstCount = ~0u; + unsigned Worst = 0; + for (unsigned i = 0; i != NumCands; ++i) { + if (i == BestCand || !GlobalCand[i].PhysReg) + continue; + unsigned Count = GlobalCand[i].LiveBundles.count(); + if (Count < WorstCount) + Worst = i, WorstCount = Count; + } + --NumCands; + GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; + } + if (GlobalCand.size() <= NumCands) GlobalCand.resize(NumCands+1); GlobalSplitCandidate &Cand = GlobalCand[NumCands]; @@ -1299,10 +1183,148 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, ++NumCands; } - if (BestCand == NoCand) + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) + return 0; + + // Prepare split editor. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitSpillMode); + + // Assign all edge bundles to the preferred candidate, or NoCand. + BundleCand.assign(Bundles->getNumBundles(), NoCand); + + // Assign bundles for the best candidate region. + if (BestCand != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[BestCand]; + if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { + UsedCands.push_back(BestCand); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " + << B << " bundles, intv " << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + // Assign bundles for the compact region. + if (HasCompact) { + GlobalSplitCandidate &Cand = GlobalCand.front(); + assert(!Cand.PhysReg && "Compact region has no physreg"); + if (unsigned B = Cand.getBundles(BundleCand, 0)) { + UsedCands.push_back(0); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " + << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + splitAroundRegion(LREdit, UsedCands); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Per-Block Splitting +//===----------------------------------------------------------------------===// + +/// tryBlockSplit - Split a global live range around every block with uses. This +/// creates a lot of local live ranges, that will be split by tryLocalSplit if +/// they don't allocate. +unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { + assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); + unsigned Reg = VirtReg.reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitSpillMode); + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); + } + // No blocks were split. + if (LREdit.empty()) + return 0; + + // We did split for some blocks. + SmallVector IntvMap; + SE->finish(&IntvMap); + + // Tell LiveDebugVariables about the new ranges. + DebugVars->splitRegister(Reg, LREdit.regs()); + + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Sort out the new intervals created by splitting. The remainder interval + // goes straight to spilling, the new local ranges get to stay RS_New. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &LI = *LREdit.get(i); + if (getStage(LI) == RS_New && IntvMap[i] == 0) + setStage(LI, RS_Spill); + } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around basic blocks"); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Per-Instruction Splitting +//===----------------------------------------------------------------------===// + +/// tryInstructionSplit - Split a live range around individual instructions. +/// This is normally not worthwhile since the spiller is doing essentially the +/// same thing. However, when the live range is in a constrained register +/// class, it may help to insert copies such that parts of the live range can +/// be moved to a larger register class. +/// +/// This is similar to spilling to a larger register class. +unsigned +RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { + // There is no point to this if there are no larger sub-classes. + if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + return 0; + + // Always enable split spill mode, since we're effectively spilling to a + // register. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitEditor::SM_Size); + + ArrayRef Uses = SA->getUseSlots(); + if (Uses.size() <= 1) return 0; - splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs); + DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); + + // Split around every non-copy instruction. + for (unsigned i = 0; i != Uses.size(); ++i) { + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) + if (MI->isFullCopy()) { + DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); + continue; + } + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); + SE->useIntv(SegStart, SegStop); + } + + if (LREdit.empty()) { + DEBUG(dbgs() << "All uses were copies.\n"); + return 0; + } + + SmallVector IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Assign all new registers to RS_Spill. This was the last chance. + setStage(LREdit.begin(), LREdit.end(), RS_Spill); return 0; } @@ -1321,29 +1343,32 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, SmallVectorImpl &GapWeight) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); - const SmallVectorImpl &Uses = SA->UseSlots; + ArrayRef Uses = SA->getUseSlots(); const unsigned NumGaps = Uses.size()-1; // Start and end points for the interference check. - SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse; - SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse; + SlotIndex StartIdx = + BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; + SlotIndex StopIdx = + BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; GapWeight.assign(NumGaps, 0.0f); // Add interference from each overlapping register. - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(const_cast(SA->getParent()), *AI) - .checkInterference()) + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (!Matrix->query(const_cast(SA->getParent()), *Units) + .checkInterference()) continue; - // We know that VirtReg is a continuous interval from FirstUse to LastUse, - // so we don't need InterferenceQuery. + // We know that VirtReg is a continuous interval from FirstInstr to + // LastInstr, so we don't need InterferenceQuery. // // Interference that overlaps an instruction is counted in both gaps // surrounding the instruction. The exception is interference before // StartIdx and after StopIdx. // - LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx); + LiveIntervalUnion::SegmentIter IntI = + Matrix->getLiveUnions()[*Units] .find(StartIdx); for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { // Skip the gaps before IntI. while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) @@ -1363,6 +1388,30 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; } } + + // Add fixed interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveInterval &LI = LIS->getRegUnit(*Units); + LiveInterval::const_iterator I = LI.find(StartIdx); + LiveInterval::const_iterator E = LI.end(); + + // Same loop as above. Mark any overlapped gaps as HUGE_VALF. + for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { + while (Uses[Gap+1].getBoundaryIndex() < I->start) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = HUGE_VALF; + if (Uses[Gap+1].getBaseIndex() >= I->end) + break; + } + if (Gap == NumGaps) + break; + } + } } /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only @@ -1377,10 +1426,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // while only covering a single block - A phi-def can use undef values from // predecessors, and the block could be a single-block loop. // We don't bother doing anything clever about such a case, we simply assume - // that the interval is continuous from FirstUse to LastUse. We should make - // sure that we don't do anything illegal to such an interval, though. + // that the interval is continuous from FirstInstr to LastInstr. We should + // make sure that we don't do anything illegal to such an interval, though. - const SmallVectorImpl &Uses = SA->UseSlots; + ArrayRef Uses = SA->getUseSlots(); if (Uses.size() <= 2) return 0; const unsigned NumGaps = Uses.size()-1; @@ -1388,10 +1437,40 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG({ dbgs() << "tryLocalSplit: "; for (unsigned i = 0, e = Uses.size(); i != e; ++i) - dbgs() << ' ' << SA->UseSlots[i]; + dbgs() << ' ' << Uses[i]; dbgs() << '\n'; }); + // If VirtReg is live across any register mask operands, compute a list of + // gaps with register masks. + SmallVector RegMaskGaps; + if (Matrix->checkRegMaskInterference(VirtReg)) { + // Get regmask slots for the whole block. + ArrayRef RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); + DEBUG(dbgs() << RMS.size() << " regmasks in block:"); + // Constrain to VirtReg's live range. + unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), + Uses.front().getRegSlot()) - RMS.begin(); + unsigned re = RMS.size(); + for (unsigned i = 0; i != NumGaps && ri != re; ++i) { + // Look for Uses[i] <= RMS <= Uses[i+1]. + assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); + if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) + continue; + // Skip a regmask on the same instruction as the last use. It doesn't + // overlap the live range. + if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) + break; + DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); + RegMaskGaps.push_back(i); + // Advance ri to the next gap. A regmask on one of the uses counts in + // both gaps. + while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) + ++ri; + } + DEBUG(dbgs() << '\n'); + } + // Since we allow local split results to be split again, there is a risk of // creating infinite loops. It is tempting to require that the new live // ranges have less instructions than the original. That would guarantee @@ -1400,17 +1479,17 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // // Instead we use these rules: // - // 1. Allow any split for ranges with getStage() < RS_Local. (Except for the + // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the // noop split, of course). - // 2. Require progress be made for ranges with getStage() >= RS_Local. All + // 2. Require progress be made for ranges with getStage() == RS_Split2. All // the new ranges must have fewer instructions than before the split. - // 3. New ranges with the same number of instructions are marked RS_Local, + // 3. New ranges with the same number of instructions are marked RS_Split2, // smaller ranges are marked RS_New. // // These rules allow a 3 -> 2+3 split once, which we need. They also prevent // excessive splitting and infinite loops. // - bool ProgressRequired = getStage(VirtReg) >= RS_Local; + bool ProgressRequired = getStage(VirtReg) >= RS_Split2; // Best split candidate. unsigned BestBefore = NumGaps; @@ -1426,6 +1505,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. calcGapWeights(PhysReg, GapWeight); + // Remove any gaps with regmask clobbers. + if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) + for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) + GapWeight[RegMaskGaps[i]] = HUGE_VALF; + // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1517,7 +1601,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit); SE->openIntv(); @@ -1529,7 +1613,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); // If the new range has the same number of instructions as before, mark it as - // RS_Local so the next split will be forced to make progress. Otherwise, + // RS_Split2 so the next split will be forced to make progress. Otherwise, // leave the new intervals as RS_New so they can compete. bool LiveBefore = BestBefore != 0 || BI.LiveIn; bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; @@ -1539,7 +1623,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(!ProgressRequired && "Didn't make progress when it was required."); for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { - setStage(*LREdit.get(i), RS_Local); + setStage(*LREdit.get(i), RS_Split2); DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); } DEBUG(dbgs() << '\n'); @@ -1558,20 +1642,22 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl&NewVRegs) { + // Ranges must be Split2 or less. + if (getStage(VirtReg) >= RS_Spill) + return 0; + // Local intervals are handled separately. if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); SA->analyze(&VirtReg); - return tryLocalSplit(VirtReg, Order, NewVRegs); + unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + return tryInstructionSplit(VirtReg, Order, NewVRegs); } NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); - // Don't iterate global splitting. - // Move straight to spilling if this range was produced by a global split. - if (getStage(VirtReg) >= RS_Global) - return 0; - SA->analyze(&VirtReg); // FIXME: SplitAnalysis may repair broken live ranges coming from the @@ -1580,29 +1666,22 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // an assertion when the coalescer is fixed. if (SA->didRepairRange()) { // VirtReg has changed, so all cached queries are invalid. - invalidateVirtRegs(); + Matrix->invalidateVirtRegs(); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; } - // First try to split around a region spanning multiple blocks. - unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); - if (PhysReg || !NewVRegs.empty()) - return PhysReg; - - // Then isolate blocks with multiple uses. - SplitAnalysis::BlockPtrSet Blocks; - if (SA->getMultiUseBlocks(Blocks)) { - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - SE->splitSingleBlocks(Blocks); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global); - if (VerifyEnabled) - MF->verify(this, "After splitting live range around basic blocks"); + // First try to split around a region spanning multiple blocks. RS_Split2 + // ranges already made dubious progress with region splitting, so they go + // straight to single block splitting. + if (getStage(VirtReg) < RS_Split2) { + unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; } - // Don't assign any physregs. - return 0; + // Then isolate blocks. + return tryBlockSplit(VirtReg, Order, NewVRegs); } @@ -1622,9 +1701,9 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary - // queue. The RS_Second ranges already failed to do this, and they should not + // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. - if (Stage != RS_Second) + if (Stage != RS_Split) if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) return PhysReg; @@ -1633,8 +1712,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // The first time we see a live range, don't try to split or spill. // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. - if (Stage == RS_First) { - setStage(VirtReg, RS_Second); + if (Stage < RS_Split) { + setStage(VirtReg, RS_Split); DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(&VirtReg); return 0; @@ -1642,7 +1721,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class wil report it. - if (Stage >= RS_Spill || !VirtReg.isSpillable()) + if (Stage >= RS_Done || !VirtReg.isSpillable()) return ~0u; // Try splitting VirtReg or interferences. @@ -1652,9 +1731,9 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(VirtReg, NewVRegs, this); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); spiller().spill(LRE); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); if (VerifyEnabled) MF->verify(this, "After spilling"); @@ -1666,19 +1745,19 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" - << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << "********** Function: " << mf.getName() << '\n'); MF = &mf; if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); - RegAllocBase::init(getAnalysis(), getAnalysis()); + RegAllocBase::init(getAnalysis(), + getAnalysis(), + getAnalysis()); Indexes = &getAnalysis(); DomTree = &getAnalysis(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis(); - LoopRanges = &getAnalysis(); Bundles = &getAnalysis(); SpillPlacer = &getAnalysis(); DebugVars = &getAnalysis(); @@ -1688,23 +1767,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; - IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); + IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); + GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); - addMBBLiveIns(MF); - LIS->addKillFlags(); - - // Run rewriter - { - NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled); - VRM->rewrite(Indexes); - } - - // Write out new DBG_VALUE instructions. - DebugVars->emitDebugValues(VRM); - - // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); - return true; }