X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocGreedy.cpp;h=feec3d4f7c317f65ffe38dba0b9c7d58312d445c;hb=aba6559370c3d453588103fb667ffa3b11b76652;hp=147e8035340fc60d19909f15eaea03d3f8082f82;hpb=49743b18f50ac0f7e065f4754a26965d4db388de;p=oota-llvm.git diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 147e8035340..feec3d4f7c3 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -22,7 +22,6 @@ #include "SpillPlacement.h" #include "SplitKit.h" #include "VirtRegMap.h" -#include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" @@ -38,6 +37,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -51,6 +51,15 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); +static cl::opt +SplitSpillMode("split-spill-mode", cl::Hidden, + cl::desc("Spill mode for splitting live ranges"), + cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), + clEnumValEnd), + cl::init(SplitEditor::SM_Partition)); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -159,6 +168,19 @@ class RAGreedy : public MachineFunctionPass, } }; + // Register mask interference. The current VirtReg is checked for register + // mask interference on entry to selectOrSplit(). If there is no + // interference, UsableRegs is left empty. If there is interference, + // UsableRegs has a bit mask of registers that can be used without register + // mask interference. + BitVector UsableRegs; + + /// clobberedByRegMask - Returns true if PhysReg is not directly usable + /// because of register mask clobbers. + bool clobberedByRegMask(unsigned PhysReg) const { + return !UsableRegs.empty() && !UsableRegs.test(PhysReg); + } + // splitting state. std::auto_ptr SA; std::auto_ptr SE; @@ -171,17 +193,38 @@ class RAGreedy : public MachineFunctionPass, /// Global live range splitting candidate info. struct GlobalSplitCandidate { + // Register intended for assignment, or 0. unsigned PhysReg; + + // SplitKit interval index for this candidate. + unsigned IntvIdx; + + // Interference for PhysReg. InterferenceCache::Cursor Intf; + + // Bundles where this candidate should be live. BitVector LiveBundles; SmallVector ActiveBlocks; void reset(InterferenceCache &Cache, unsigned Reg) { PhysReg = Reg; + IntvIdx = 0; Intf.setPhysReg(Cache, Reg); LiveBundles.clear(); ActiveBlocks.clear(); } + + // Set B[i] = C for every live bundle where B[i] was NoCand. + unsigned getBundles(SmallVectorImpl &B, unsigned C) { + unsigned Count = 0; + for (int i = LiveBundles.find_first(); i >= 0; + i = LiveBundles.find_next(i)) + if (B[i] == NoCand) { + B[i] = C; + Count++; + } + return Count; + } }; /// Candidate info for for each PhysReg in AllocationOrder. @@ -189,6 +232,12 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector GlobalCand; + enum { NoCand = ~0u }; + + /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to + /// NoCand which indicates the stack interval. + SmallVector BundleCand; + public: RAGreedy(); @@ -212,7 +261,6 @@ public: static char ID; private: - void LRE_WillEraseInstruction(MachineInstr*); bool LRE_CanEraseVirtReg(unsigned); void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); @@ -223,8 +271,7 @@ private: void growRegion(GlobalSplitCandidate &Cand); float calcGlobalSplitCost(GlobalSplitCandidate&); bool calcCompactRegion(GlobalSplitCandidate&); - void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, - SmallVectorImpl&); + void splitAroundRegion(LiveRangeEdit&, ArrayRef); void calcGapWeights(unsigned, SmallVectorImpl&); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); @@ -237,6 +284,8 @@ private: SmallVectorImpl&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned trySplit(LiveInterval&, AllocationOrder&, @@ -271,8 +320,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); @@ -291,9 +340,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitive(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -313,11 +359,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { - // LRE itself will remove from SlotIndexes and parent basic block. - VRM->RemoveMachineInstrFromMaps(MI); -} - bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (unsigned PhysReg = VRM->getPhys(VirtReg)) { unassign(LIS->getInterval(VirtReg), PhysReg); @@ -340,9 +381,15 @@ void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { } void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // Cloning a register we haven't even heard about yet? Just ignore it. + if (!ExtraRegInfo.inBounds(Old)) + return; + // LRE may clone a virtual register because dead code elimination causes it to - // be split into connected components. Ensure that the new register gets the + // be split into connected components. The new components are much smaller + // than the original, so they should get a new chance at being assigned. // same stage as the parent. + ExtraRegInfo[Old].Stage = RS_Assign; ExtraRegInfo.grow(New); ExtraRegInfo[New] = ExtraRegInfo[Old]; } @@ -367,14 +414,13 @@ void RAGreedy::enqueue(LiveInterval *LI) { if (ExtraRegInfo[Reg].Stage == RS_New) ExtraRegInfo[Reg].Stage = RS_Assign; - if (ExtraRegInfo[Reg].Stage == RS_Split) + if (ExtraRegInfo[Reg].Stage == RS_Split) { // Unsplit ranges that couldn't be allocated immediately are deferred until - // everything else has been allocated. Long ranges are allocated last so - // they are split against realistic interference. - Prio = (1u << 31) - Size; - else { - // Everything else is allocated in long->short order. Long ranges that don't - // fit should be spilled ASAP so they don't create interference. + // everything else has been allocated. + Prio = Size; + } else { + // Everything is allocated in long->short order. Long ranges that don't fit + // should be spilled (or split) ASAP so they don't create interference. Prio = (1u << 31) + Size; // Boost ranges that have a physical register hint. @@ -404,9 +450,12 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { Order.rewind(); unsigned PhysReg; - while ((PhysReg = Order.next())) + while ((PhysReg = Order.next())) { + if (clobberedByRegMask(PhysReg)) + continue; if (!checkPhysRegInterference(VirtReg, PhysReg)) break; + } if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; @@ -415,7 +464,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // If we missed a simple hint, try to cheaply evict interference from the // preferred register. if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) - if (Order.isHint(Hint)) { + if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) { DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); EvictionCost MaxCost(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { @@ -490,7 +539,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, Cascade = NextCascade; EvictionCost Cost; - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); // If there is 10 or more interferences, chances are one is heavier. if (Q.collectInterferingVRegs(10) >= 10) @@ -548,7 +597,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); assert(Q.seenAllInterferences() && "Didn't check all interfererences."); for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { @@ -587,6 +636,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, Order.rewind(); while (unsigned PhysReg = Order.next()) { + if (clobberedByRegMask(PhysReg)) + continue; if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -642,6 +693,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.ChangesValue = BI.FirstDef; if (!Intf.hasInterference()) continue; @@ -653,9 +705,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, if (BI.LiveIn) { if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) BC.Entry = SpillPlacement::MustSpill, ++Ins; - else if (Intf.first() < BI.FirstUse) + else if (Intf.first() < BI.FirstInstr) BC.Entry = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.first() < BI.LastUse) + else if (Intf.first() < BI.LastInstr) ++Ins; } @@ -663,9 +715,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, if (BI.LiveOut) { if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) BC.Exit = SpillPlacement::MustSpill, ++Ins; - else if (Intf.last() > BI.LastUse) + else if (Intf.last() > BI.LastInstr) BC.Exit = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.last() > BI.FirstUse) + else if (Intf.last() > BI.FirstInstr) ++Ins; } @@ -771,7 +823,9 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { if (Cand.PhysReg) addThroughConstraints(Cand.Intf, NewBlocks); else - SpillPlacer->addPrefSpill(NewBlocks); + // Provide a strong negative bias on through blocks to prevent unwanted + // liveness on loop backedges. + SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); AddedTo = ActiveBlocks.size(); // Perhaps iterating can enable more bundles? @@ -829,7 +883,6 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { /// SA around all use blocks instead of forming bundle regions. float RAGreedy::calcSpillCost() { float Cost = 0; - const LiveInterval &LI = SA->getParent(); ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -838,16 +891,8 @@ float RAGreedy::calcSpillCost() { Cost += SpillPlacer->getBlockFrequency(Number); // Unless the value is redefined in the block. - if (BI.LiveIn && BI.LiveOut) { - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(Number); - LiveInterval::const_iterator I = LI.find(Start); - assert(I != LI.end() && "Expected live-in value"); - // Is there a different live-out value? If so, we need an extra spill - // instruction. - if (I->end < Stop) - Cost += SpillPlacer->getBlockFrequency(Number); - } + if (BI.LiveIn && BI.LiveOut && BI.FirstDef) + Cost += SpillPlacer->getBlockFrequency(Number); } return Cost; } @@ -894,81 +939,115 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { return GlobalCost; } -/// splitAroundRegion - Split VirtReg around the region determined by -/// LiveBundles. Make an effort to avoid interference from PhysReg. +/// splitAroundRegion - Split the current live range around the regions +/// determined by BundleCand and GlobalCand. /// -/// The 'register' interval is going to contain as many uses as possible while -/// avoiding interference. The 'stack' interval is the complement constructed by -/// SplitEditor. It will contain the rest. +/// Before calling this function, GlobalCand and BundleCand must be initialized +/// so each bundle is assigned to a valid candidate, or NoCand for the +/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor +/// objects must be initialized for the current live range, and intervals +/// created for the used candidates. /// -void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, - GlobalSplitCandidate &Cand, - SmallVectorImpl &NewVRegs) { - const BitVector &LiveBundles = Cand.LiveBundles; - - DEBUG({ - dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI) - << " with bundles"; - for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) - dbgs() << " EB#" << i; - dbgs() << ".\n"; - }); - - InterferenceCache::Cursor &Intf = Cand.Intf; - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - - // Create the main cross-block interval. - const unsigned MainIntv = SE->openIntv(); +/// @param LREdit The LiveRangeEdit object handling the current split. +/// @param UsedCands List of used GlobalCand entries. Every BundleCand value +/// must appear in this list. +void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, + ArrayRef UsedCands) { + // These are the intervals created for new global ranges. We may create more + // intervals for local ranges. + const unsigned NumGlobalIntvs = LREdit.size(); + DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); + assert(NumGlobalIntvs && "No global intervals configured"); + + // Isolate even single instructions when dealing with a proper sub-class. + // That guarantees register class inflation for the stack interval because it + // is all copies. + unsigned Reg = SA->getParent().reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); // First handle all the blocks with uses. ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = BI.LiveIn && - LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = BI.LiveOut && - LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + unsigned Number = BI.MBB->getNumber(); + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + if (BI.LiveIn) { + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); + } + } + if (BI.LiveOut) { + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); + } + } // Create separate intervals for isolated blocks with multiple uses. - if (!RegIn && !RegOut) { + if (!IntvIn && !IntvOut) { DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); - if (!BI.isOneInstr()) { + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) SE->splitSingleBlock(BI); - SE->selectIntv(MainIntv); - } continue; } - Intf.moveToBlock(BI.MBB->getNumber()); - - if (RegIn && RegOut) - SE->splitLiveThroughBlock(BI.MBB->getNumber(), - MainIntv, Intf.first(), - MainIntv, Intf.last()); - else if (RegIn) - SE->splitRegInBlock(BI, MainIntv, Intf.first()); + if (IntvIn && IntvOut) + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + else if (IntvIn) + SE->splitRegInBlock(BI, IntvIn, IntfIn); else - SE->splitRegOutBlock(BI, MainIntv, Intf.last()); + SE->splitRegOutBlock(BI, IntvOut, IntfOut); } - // Handle live-through blocks. - for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { - unsigned Number = Cand.ActiveBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; - if (!RegIn && !RegOut) - continue; - Intf.moveToBlock(Number); - SE->splitLiveThroughBlock(Number, RegIn ? MainIntv : 0, Intf.first(), - RegOut ? MainIntv : 0, Intf.last()); + // Handle live-through blocks. The relevant live-through blocks are stored in + // the ActiveBlocks list with each candidate. We need to filter out + // duplicates. + BitVector Todo = SA->getThroughBlocks(); + for (unsigned c = 0; c != UsedCands.size(); ++c) { + ArrayRef Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + unsigned Number = Blocks[i]; + if (!Todo.test(Number)) + continue; + Todo.reset(Number); + + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); + } + + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); + } + if (!IntvIn && !IntvOut) + continue; + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + } } ++NumGlobalSplits; SmallVector IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs()); ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); @@ -992,9 +1071,9 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, continue; } - // Main interval. Allow repeated splitting as long as the number of live - // blocks is strictly decreasing. Otherwise force per-block splitting. - if (IntvMap[i] == MainIntv) { + // Global intervals. Allow repeated splitting as long as the number of live + // blocks is strictly decreasing. + if (IntvMap[i] < NumGlobalIntvs) { if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks << " blocks as original.\n"); @@ -1014,11 +1093,23 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { - float BestCost = Hysteresis * calcSpillCost(); - DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); - const unsigned NoCand = ~0u; - unsigned BestCand = NoCand; unsigned NumCands = 0; + unsigned BestCand = NoCand; + float BestCost; + SmallVector UsedCands; + + // Check if we can split this live range around a compact region. + bool HasCompact = calcCompactRegion(GlobalCand.front()); + if (HasCompact) { + // Yes, keep GlobalCand[0] as the compact region candidate. + NumCands = 1; + BestCost = HUGE_VALF; + } else { + // No benefit from the compact region, our fallback will be per-block + // splitting. Make sure we find a solution that is cheaper than spilling. + BestCost = Hysteresis * calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + } Order.rewind(); while (unsigned PhysReg = Order.next()) { @@ -1028,7 +1119,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned WorstCount = ~0u; unsigned Worst = 0; for (unsigned i = 0; i != NumCands; ++i) { - if (i == BestCand) + if (i == BestCand || !GlobalCand[i].PhysReg) continue; unsigned Count = GlobalCand[i].LiveBundles.count(); if (Count < WorstCount) @@ -1036,6 +1127,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } --NumCands; GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; } if (GlobalCand.size() <= NumCands) @@ -1085,14 +1178,93 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, ++NumCands; } - if (BestCand == NoCand) + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) return 0; - splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs); + // Prepare split editor. + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit, SplitSpillMode); + + // Assign all edge bundles to the preferred candidate, or NoCand. + BundleCand.assign(Bundles->getNumBundles(), NoCand); + + // Assign bundles for the best candidate region. + if (BestCand != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[BestCand]; + if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { + UsedCands.push_back(BestCand); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " + << B << " bundles, intv " << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + // Assign bundles for the compact region. + if (HasCompact) { + GlobalSplitCandidate &Cand = GlobalCand.front(); + assert(!Cand.PhysReg && "Compact region has no physreg"); + if (unsigned B = Cand.getBundles(BundleCand, 0)) { + UsedCands.push_back(0); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " + << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + splitAroundRegion(LREdit, UsedCands); return 0; } +//===----------------------------------------------------------------------===// +// Per-Block Splitting +//===----------------------------------------------------------------------===// + +/// tryBlockSplit - Split a global live range around every block with uses. This +/// creates a lot of local live ranges, that will be split by tryLocalSplit if +/// they don't allocate. +unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { + assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); + unsigned Reg = VirtReg.reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit, SplitSpillMode); + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); + } + // No blocks were split. + if (LREdit.empty()) + return 0; + + // We did split for some blocks. + SmallVector IntvMap; + SE->finish(&IntvMap); + + // Tell LiveDebugVariables about the new ranges. + DebugVars->splitRegister(Reg, LREdit.regs()); + + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Sort out the new intervals created by splitting. The remainder interval + // goes straight to spilling, the new local ranges get to stay RS_New. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &LI = *LREdit.get(i); + if (getStage(LI) == RS_New && IntvMap[i] == 0) + setStage(LI, RS_Spill); + } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around basic blocks"); + return 0; +} + //===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// @@ -1107,29 +1279,31 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, SmallVectorImpl &GapWeight) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); - const SmallVectorImpl &Uses = SA->UseSlots; + ArrayRef Uses = SA->getUseSlots(); const unsigned NumGaps = Uses.size()-1; // Start and end points for the interference check. - SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse; - SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse; + SlotIndex StartIdx = + BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; + SlotIndex StopIdx = + BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; GapWeight.assign(NumGaps, 0.0f); // Add interference from each overlapping register. - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { + for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { if (!query(const_cast(SA->getParent()), *AI) .checkInterference()) continue; - // We know that VirtReg is a continuous interval from FirstUse to LastUse, - // so we don't need InterferenceQuery. + // We know that VirtReg is a continuous interval from FirstInstr to + // LastInstr, so we don't need InterferenceQuery. // // Interference that overlaps an instruction is counted in both gaps // surrounding the instruction. The exception is interference before // StartIdx and after StopIdx. // - LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx); + LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx); for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { // Skip the gaps before IntI. while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) @@ -1163,10 +1337,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // while only covering a single block - A phi-def can use undef values from // predecessors, and the block could be a single-block loop. // We don't bother doing anything clever about such a case, we simply assume - // that the interval is continuous from FirstUse to LastUse. We should make - // sure that we don't do anything illegal to such an interval, though. + // that the interval is continuous from FirstInstr to LastInstr. We should + // make sure that we don't do anything illegal to such an interval, though. - const SmallVectorImpl &Uses = SA->UseSlots; + ArrayRef Uses = SA->getUseSlots(); if (Uses.size() <= 2) return 0; const unsigned NumGaps = Uses.size()-1; @@ -1174,10 +1348,40 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG({ dbgs() << "tryLocalSplit: "; for (unsigned i = 0, e = Uses.size(); i != e; ++i) - dbgs() << ' ' << SA->UseSlots[i]; + dbgs() << ' ' << Uses[i]; dbgs() << '\n'; }); + // If VirtReg is live across any register mask operands, compute a list of + // gaps with register masks. + SmallVector RegMaskGaps; + if (!UsableRegs.empty()) { + // Get regmask slots for the whole block. + ArrayRef RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); + DEBUG(dbgs() << RMS.size() << " regmasks in block:"); + // Constrain to VirtReg's live range. + unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), + Uses.front().getRegSlot()) - RMS.begin(); + unsigned re = RMS.size(); + for (unsigned i = 0; i != NumGaps && ri != re; ++i) { + // Look for Uses[i] <= RMS <= Uses[i+1]. + assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); + if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) + continue; + // Skip a regmask on the same instruction as the last use. It doesn't + // overlap the live range. + if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) + break; + DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); + RegMaskGaps.push_back(i); + // Advance ri to the next gap. A regmask on one of the uses counts in + // both gaps. + while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) + ++ri; + } + DEBUG(dbgs() << '\n'); + } + // Since we allow local split results to be split again, there is a risk of // creating infinite loops. It is tempting to require that the new live // ranges have less instructions than the original. That would guarantee @@ -1212,6 +1416,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. calcGapWeights(PhysReg, GapWeight); + // Remove any gaps with regmask clobbers. + if (clobberedByRegMask(PhysReg)) + for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) + GapWeight[RegMaskGaps[i]] = HUGE_VALF; + // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1344,6 +1553,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl&NewVRegs) { + // Ranges must be Split2 or less. + if (getStage(VirtReg) >= RS_Spill) + return 0; + // Local intervals are handled separately. if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); @@ -1353,10 +1566,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); - // Ranges must be Split2 or less. - if (getStage(VirtReg) >= RS_Spill) - return 0; - SA->analyze(&VirtReg); // FIXME: SplitAnalysis may repair broken live ranges coming from the @@ -1379,19 +1588,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, return PhysReg; } - // Then isolate blocks with multiple uses. - SplitAnalysis::BlockPtrSet Blocks; - if (SA->getMultiUseBlocks(Blocks)) { - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - SE->splitSingleBlocks(Blocks); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill); - if (VerifyEnabled) - MF->verify(this, "After splitting live range around basic blocks"); - } - - // Don't assign any physregs. - return 0; + // Then isolate blocks. + return tryBlockSplit(VirtReg, Order, NewVRegs); } @@ -1401,6 +1599,11 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { + // Check if VirtReg is live across any calls. + UsableRegs.clear(); + if (LIS->checkRegMaskInterference(VirtReg, UsableRegs)) + DEBUG(dbgs() << "Live across regmasks.\n"); + // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1476,7 +1679,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; - IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); + IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI); + GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); addMBBLiveIns(MF); @@ -1489,9 +1693,15 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { } // Write out new DBG_VALUE instructions. - DebugVars->emitDebugValues(VRM); + { + NamedRegionTimer T("Emit Debug Info", TimerGroupName, TimePassesIsEnabled); + DebugVars->emitDebugValues(VRM); + } - // The pass output is in VirtRegMap. Release all the transient data. + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); releaseMemory(); return true;