X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocGreedy.cpp;h=9eed1fc62accc2381c7c41b4ff6150c9bc287690;hb=c496875f0c205ffadcec8060e1170e1c58e4eb55;hp=1f0c241dd20506940b1094a397a59b1d6b1a3fba;hpb=00005782fa860f4b48b3b5261d92541c61ee2495;p=oota-llvm.git diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 1f0c241dd20..9eed1fc62ac 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -13,37 +13,34 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" #include "RegAllocBase.h" -#include "Spiller.h" #include "SpillPlacement.h" +#include "Spiller.h" #include "SplitKit.h" -#include "VirtRegMap.h" -#include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" -#include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/PassAnalysisSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" - +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -52,7 +49,14 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); -cl::opt CompactRegions("compact-regions"); +static cl::opt +SplitSpillMode("split-spill-mode", cl::Hidden, + cl::desc("Spill mode for splitting live ranges"), + cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), + clEnumValEnd), + cl::init(SplitEditor::SM_Partition)); static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -67,7 +71,6 @@ class RAGreedy : public MachineFunctionPass, // analyses SlotIndexes *Indexes; - LiveStacks *LS; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; EdgeBundles *Bundles; @@ -75,7 +78,7 @@ class RAGreedy : public MachineFunctionPass, LiveDebugVariables *DebugVars; // state - std::auto_ptr SpillerInstance; + OwningPtr SpillerInstance; std::priority_queue > Queue; unsigned NextCascade; @@ -163,8 +166,8 @@ class RAGreedy : public MachineFunctionPass, }; // splitting state. - std::auto_ptr SA; - std::auto_ptr SE; + OwningPtr SA; + OwningPtr SE; /// Cached per-block interference maps InterferenceCache IntfCache; @@ -242,7 +245,6 @@ public: static char ID; private: - void LRE_WillEraseInstruction(MachineInstr*); bool LRE_CanEraseVirtReg(unsigned); void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); @@ -266,6 +268,10 @@ private: SmallVectorImpl&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); + unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned trySplit(LiveInterval&, AllocationOrder&, @@ -300,13 +306,14 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); } @@ -316,22 +323,22 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitive(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); @@ -342,14 +349,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { - // LRE itself will remove from SlotIndexes and parent basic block. - VRM->RemoveMachineInstrFromMaps(MI); -} - bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { - if (unsigned PhysReg = VRM->getPhys(VirtReg)) { - unassign(LIS->getInterval(VirtReg), PhysReg); + if (VRM->hasPhys(VirtReg)) { + Matrix->unassign(LIS->getInterval(VirtReg)); return true; } // Unassigned virtreg is probably in the priority queue. @@ -358,17 +360,20 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { } void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { - unsigned PhysReg = VRM->getPhys(VirtReg); - if (!PhysReg) + if (!VRM->hasPhys(VirtReg)) return; // Register is assigned, put it back on the queue for reassignment. LiveInterval &LI = LIS->getInterval(VirtReg); - unassign(LI, PhysReg); + Matrix->unassign(LI); enqueue(&LI); } void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // Cloning a register we haven't even heard about yet? Just ignore it. + if (!ExtraRegInfo.inBounds(Old)) + return; + // LRE may clone a virtual register because dead code elimination causes it to // be split into connected components. The new components are much smaller // than the original, so they should get a new chance at being assigned. @@ -382,7 +387,6 @@ void RAGreedy::releaseMemory() { SpillerInstance.reset(0); ExtraRegInfo.clear(); GlobalCand.clear(); - RegAllocBase::releaseMemory(); } void RAGreedy::enqueue(LiveInterval *LI) { @@ -398,28 +402,27 @@ void RAGreedy::enqueue(LiveInterval *LI) { if (ExtraRegInfo[Reg].Stage == RS_New) ExtraRegInfo[Reg].Stage = RS_Assign; - if (ExtraRegInfo[Reg].Stage == RS_Split) + if (ExtraRegInfo[Reg].Stage == RS_Split) { // Unsplit ranges that couldn't be allocated immediately are deferred until - // everything else has been allocated. Long ranges are allocated last so - // they are split against realistic interference. - Prio = (1u << 31) - Size; - else { - // Everything else is allocated in long->short order. Long ranges that don't - // fit should be spilled ASAP so they don't create interference. + // everything else has been allocated. + Prio = Size; + } else { + // Everything is allocated in long->short order. Long ranges that don't fit + // should be spilled (or split) ASAP so they don't create interference. Prio = (1u << 31) + Size; // Boost ranges that have a physical register hint. - if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg))) + if (VRM->hasKnownPreference(Reg)) Prio |= (1u << 30); } - Queue.push(std::make_pair(Prio, Reg)); + Queue.push(std::make_pair(Prio, ~Reg)); } LiveInterval *RAGreedy::dequeue() { if (Queue.empty()) return 0; - LiveInterval *LI = &LIS->getInterval(Queue.top().second); + LiveInterval *LI = &LIS->getInterval(~Queue.top().second); Queue.pop(); return LI; } @@ -436,9 +439,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, Order.rewind(); unsigned PhysReg; while ((PhysReg = Order.next())) - if (!checkPhysRegInterference(VirtReg, PhysReg)) + if (!Matrix->checkInterference(VirtReg, PhysReg)) break; - if (!PhysReg || Order.isHint(PhysReg)) + if (!PhysReg || Order.isHint()) return PhysReg; // PhysReg is available, but there may be a better choice. @@ -503,12 +506,16 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. -/// @prarm IsHint True when PhysReg is VirtReg's preferred register. +/// @param IsHint True when PhysReg is VirtReg's preferred register. /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, bool IsHint, EvictionCost &MaxCost) { + // It is only possible to evict virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) + return false; + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -521,8 +528,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, Cascade = NextCascade; EvictionCost Cost; - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is 10 or more interferences, chances are one is heavier. if (Q.collectInterferingVRegs(10) >= 10) return false; @@ -530,15 +537,21 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Check if any interfering live range is heavier than MaxWeight. for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) - return false; + assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + "Only expecting virtual register interference from query"); // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) return false; // Once a live range becomes small enough, it is urgent that we find a // register for it. This is indicated by an infinite spill weight. These // urgent live ranges get to evict almost anything. - bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable(); + // + // Also allow urgent evictions of unspillable ranges from a strictly + // larger allocation order. + bool Urgent = !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); // Only evict older cascades or live ranges without a cascade. unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; if (Cascade <= IntfCascade) { @@ -579,19 +592,29 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + + // Collect all interfering virtregs first. + SmallVector Intfs; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); assert(Q.seenAllInterferences() && "Didn't check all interfererences."); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; - unassign(*Intf, VRM->getPhys(Intf->reg)); - assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || - VirtReg.isSpillable() < Intf->isSpillable()) && - "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg].Cascade = Cascade; - ++NumEvicted; - NewVRegs.push_back(Intf); - } + ArrayRef IVR = Q.interferingVRegs(); + Intfs.append(IVR.begin(), IVR.end()); + } + + // Evict them second. This will invalidate the queries. + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval *Intf = Intfs[i]; + // The same VirtReg may be present in multiple RegUnits. Skip duplicates. + if (!VRM->hasPhys(Intf->reg)) + continue; + Matrix->unassign(*Intf); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); } } @@ -608,16 +631,33 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // Keep track of the cheapest interference seen so far. EvictionCost BestCost(~0u); unsigned BestPhys = 0; + unsigned OrderLimit = Order.getOrder().size(); // When we are just looking for a reduced cost per use, don't break any // hints, and only evict smaller spill weights. if (CostPerUseLimit < ~0u) { BestCost.BrokenHints = 0; BestCost.MaxWeight = VirtReg.weight; + + // Check of any registers in RC are below CostPerUseLimit. + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); + unsigned MinCost = RegClassInfo.getMinCost(RC); + if (MinCost >= CostPerUseLimit) { + DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost + << ", no cheaper registers to be found.\n"); + return 0; + } + + // It is normal for register classes to have a long tail of registers with + // the same cost. We don't need to look at them if they're too expensive. + if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { + OrderLimit = RegClassInfo.getLastCostChange(RC); + DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); + } } Order.rewind(); - while (unsigned PhysReg = Order.next()) { + while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -637,7 +677,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, BestPhys = PhysReg; // Stop if the hint can be used. - if (Order.isHint(PhysReg)) + if (Order.isHint()) break; } @@ -673,6 +713,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.ChangesValue = BI.FirstDef; if (!Intf.hasInterference()) continue; @@ -684,9 +725,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, if (BI.LiveIn) { if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) BC.Entry = SpillPlacement::MustSpill, ++Ins; - else if (Intf.first() < BI.FirstUse) + else if (Intf.first() < BI.FirstInstr) BC.Entry = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.first() < BI.LastUse) + else if (Intf.first() < BI.LastInstr) ++Ins; } @@ -694,9 +735,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, if (BI.LiveOut) { if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) BC.Exit = SpillPlacement::MustSpill, ++Ins; - else if (Intf.last() > BI.LastUse) + else if (Intf.last() > BI.LastInstr) BC.Exit = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.last() > BI.FirstUse) + else if (Intf.last() > BI.FirstInstr) ++Ins; } @@ -802,7 +843,9 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { if (Cand.PhysReg) addThroughConstraints(Cand.Intf, NewBlocks); else - SpillPlacer->addPrefSpill(NewBlocks); + // Provide a strong negative bias on through blocks to prevent unwanted + // liveness on loop backedges. + SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); AddedTo = ActiveBlocks.size(); // Perhaps iterating can enable more bundles? @@ -860,7 +903,6 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { /// SA around all use blocks instead of forming bundle regions. float RAGreedy::calcSpillCost() { float Cost = 0; - const LiveInterval &LI = SA->getParent(); ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -869,16 +911,8 @@ float RAGreedy::calcSpillCost() { Cost += SpillPlacer->getBlockFrequency(Number); // Unless the value is redefined in the block. - if (BI.LiveIn && BI.LiveOut) { - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(Number); - LiveInterval::const_iterator I = LI.find(Start); - assert(I != LI.end() && "Expected live-in value"); - // Is there a different live-out value? If so, we need an extra spill - // instruction. - if (I->end < Stop) - Cost += SpillPlacer->getBlockFrequency(Number); - } + if (BI.LiveIn && BI.LiveOut && BI.FirstDef) + Cost += SpillPlacer->getBlockFrequency(Number); } return Cost; } @@ -945,6 +979,12 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); assert(NumGlobalIntvs && "No global intervals configured"); + // Isolate even single instructions when dealing with a proper sub-class. + // That guarantees register class inflation for the stack interval because it + // is all copies. + unsigned Reg = SA->getParent().reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + // First handle all the blocks with uses. ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -974,7 +1014,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // Create separate intervals for isolated blocks with multiple uses. if (!IntvIn && !IntvOut) { DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); - if (!BI.isOneInstr()) + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) SE->splitSingleBlock(BI); continue; } @@ -1027,7 +1067,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, SmallVector IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(SA->getParent().reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs()); ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); @@ -1079,7 +1119,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector UsedCands; // Check if we can split this live range around a compact region. - bool HasCompact = CompactRegions && calcCompactRegion(GlobalCand.front()); + bool HasCompact = calcCompactRegion(GlobalCand.front()); if (HasCompact) { // Yes, keep GlobalCand[0] as the compact region candidate. NumCands = 1; @@ -1107,6 +1147,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } --NumCands; GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; } if (GlobalCand.size() <= NumCands) @@ -1161,8 +1203,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; // Prepare split editor. - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitSpillMode); // Assign all edge bundles to the preferred candidate, or NoCand. BundleCand.assign(Bundles->getNumBundles(), NoCand); @@ -1175,6 +1217,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, Cand.IntvIdx = SE->openIntv(); DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " << B << " bundles, intv " << Cand.IntvIdx << ".\n"); + (void)B; } } @@ -1187,6 +1230,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, Cand.IntvIdx = SE->openIntv(); DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " << Cand.IntvIdx << ".\n"); + (void)B; } } @@ -1195,6 +1239,111 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } +//===----------------------------------------------------------------------===// +// Per-Block Splitting +//===----------------------------------------------------------------------===// + +/// tryBlockSplit - Split a global live range around every block with uses. This +/// creates a lot of local live ranges, that will be split by tryLocalSplit if +/// they don't allocate. +unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { + assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); + unsigned Reg = VirtReg.reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitSpillMode); + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); + } + // No blocks were split. + if (LREdit.empty()) + return 0; + + // We did split for some blocks. + SmallVector IntvMap; + SE->finish(&IntvMap); + + // Tell LiveDebugVariables about the new ranges. + DebugVars->splitRegister(Reg, LREdit.regs()); + + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Sort out the new intervals created by splitting. The remainder interval + // goes straight to spilling, the new local ranges get to stay RS_New. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &LI = *LREdit.get(i); + if (getStage(LI) == RS_New && IntvMap[i] == 0) + setStage(LI, RS_Spill); + } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around basic blocks"); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Per-Instruction Splitting +//===----------------------------------------------------------------------===// + +/// tryInstructionSplit - Split a live range around individual instructions. +/// This is normally not worthwhile since the spiller is doing essentially the +/// same thing. However, when the live range is in a constrained register +/// class, it may help to insert copies such that parts of the live range can +/// be moved to a larger register class. +/// +/// This is similar to spilling to a larger register class. +unsigned +RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { + // There is no point to this if there are no larger sub-classes. + if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + return 0; + + // Always enable split spill mode, since we're effectively spilling to a + // register. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitEditor::SM_Size); + + ArrayRef Uses = SA->getUseSlots(); + if (Uses.size() <= 1) + return 0; + + DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); + + // Split around every non-copy instruction. + for (unsigned i = 0; i != Uses.size(); ++i) { + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) + if (MI->isFullCopy()) { + DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); + continue; + } + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); + SE->useIntv(SegStart, SegStop); + } + + if (LREdit.empty()) { + DEBUG(dbgs() << "All uses were copies.\n"); + return 0; + } + + SmallVector IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Assign all new registers to RS_Spill. This was the last chance. + setStage(LREdit.begin(), LREdit.end(), RS_Spill); + return 0; +} + + //===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// @@ -1209,29 +1358,32 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, SmallVectorImpl &GapWeight) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); - const SmallVectorImpl &Uses = SA->UseSlots; + ArrayRef Uses = SA->getUseSlots(); const unsigned NumGaps = Uses.size()-1; // Start and end points for the interference check. - SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse; - SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse; + SlotIndex StartIdx = + BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; + SlotIndex StopIdx = + BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; GapWeight.assign(NumGaps, 0.0f); // Add interference from each overlapping register. - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(const_cast(SA->getParent()), *AI) - .checkInterference()) + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (!Matrix->query(const_cast(SA->getParent()), *Units) + .checkInterference()) continue; - // We know that VirtReg is a continuous interval from FirstUse to LastUse, - // so we don't need InterferenceQuery. + // We know that VirtReg is a continuous interval from FirstInstr to + // LastInstr, so we don't need InterferenceQuery. // // Interference that overlaps an instruction is counted in both gaps // surrounding the instruction. The exception is interference before // StartIdx and after StopIdx. // - LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx); + LiveIntervalUnion::SegmentIter IntI = + Matrix->getLiveUnions()[*Units] .find(StartIdx); for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { // Skip the gaps before IntI. while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) @@ -1251,6 +1403,30 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; } } + + // Add fixed interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveInterval &LI = LIS->getRegUnit(*Units); + LiveInterval::const_iterator I = LI.find(StartIdx); + LiveInterval::const_iterator E = LI.end(); + + // Same loop as above. Mark any overlapped gaps as HUGE_VALF. + for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { + while (Uses[Gap+1].getBoundaryIndex() < I->start) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = HUGE_VALF; + if (Uses[Gap+1].getBaseIndex() >= I->end) + break; + } + if (Gap == NumGaps) + break; + } + } } /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only @@ -1265,10 +1441,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // while only covering a single block - A phi-def can use undef values from // predecessors, and the block could be a single-block loop. // We don't bother doing anything clever about such a case, we simply assume - // that the interval is continuous from FirstUse to LastUse. We should make - // sure that we don't do anything illegal to such an interval, though. + // that the interval is continuous from FirstInstr to LastInstr. We should + // make sure that we don't do anything illegal to such an interval, though. - const SmallVectorImpl &Uses = SA->UseSlots; + ArrayRef Uses = SA->getUseSlots(); if (Uses.size() <= 2) return 0; const unsigned NumGaps = Uses.size()-1; @@ -1276,10 +1452,40 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG({ dbgs() << "tryLocalSplit: "; for (unsigned i = 0, e = Uses.size(); i != e; ++i) - dbgs() << ' ' << SA->UseSlots[i]; + dbgs() << ' ' << Uses[i]; dbgs() << '\n'; }); + // If VirtReg is live across any register mask operands, compute a list of + // gaps with register masks. + SmallVector RegMaskGaps; + if (Matrix->checkRegMaskInterference(VirtReg)) { + // Get regmask slots for the whole block. + ArrayRef RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); + DEBUG(dbgs() << RMS.size() << " regmasks in block:"); + // Constrain to VirtReg's live range. + unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), + Uses.front().getRegSlot()) - RMS.begin(); + unsigned re = RMS.size(); + for (unsigned i = 0; i != NumGaps && ri != re; ++i) { + // Look for Uses[i] <= RMS <= Uses[i+1]. + assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); + if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) + continue; + // Skip a regmask on the same instruction as the last use. It doesn't + // overlap the live range. + if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) + break; + DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); + RegMaskGaps.push_back(i); + // Advance ri to the next gap. A regmask on one of the uses counts in + // both gaps. + while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) + ++ri; + } + DEBUG(dbgs() << '\n'); + } + // Since we allow local split results to be split again, there is a risk of // creating infinite loops. It is tempting to require that the new live // ranges have less instructions than the original. That would guarantee @@ -1314,6 +1520,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. calcGapWeights(PhysReg, GapWeight); + // Remove any gaps with regmask clobbers. + if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) + for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) + GapWeight[RegMaskGaps[i]] = HUGE_VALF; + // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1405,7 +1616,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit); SE->openIntv(); @@ -1446,19 +1657,22 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl&NewVRegs) { + // Ranges must be Split2 or less. + if (getStage(VirtReg) >= RS_Spill) + return 0; + // Local intervals are handled separately. if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); SA->analyze(&VirtReg); - return tryLocalSplit(VirtReg, Order, NewVRegs); + unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + return tryInstructionSplit(VirtReg, Order, NewVRegs); } NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); - // Ranges must be Split2 or less. - if (getStage(VirtReg) >= RS_Spill) - return 0; - SA->analyze(&VirtReg); // FIXME: SplitAnalysis may repair broken live ranges coming from the @@ -1467,7 +1681,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // an assertion when the coalescer is fixed. if (SA->didRepairRange()) { // VirtReg has changed, so all cached queries are invalid. - invalidateVirtRegs(); + Matrix->invalidateVirtRegs(); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; } @@ -1481,19 +1695,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, return PhysReg; } - // Then isolate blocks with multiple uses. - SplitAnalysis::BlockPtrSet Blocks; - if (SA->getMultiUseBlocks(Blocks)) { - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - SE->splitSingleBlocks(Blocks); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill); - if (VerifyEnabled) - MF->verify(this, "After splitting live range around basic blocks"); - } - - // Don't assign any physregs. - return 0; + // Then isolate blocks. + return tryBlockSplit(VirtReg, Order, NewVRegs); } @@ -1543,7 +1746,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(VirtReg, NewVRegs, this); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); @@ -1557,14 +1760,15 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" - << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << "********** Function: " << mf.getName() << '\n'); MF = &mf; if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); - RegAllocBase::init(getAnalysis(), getAnalysis()); + RegAllocBase::init(getAnalysis(), + getAnalysis(), + getAnalysis()); Indexes = &getAnalysis(); DomTree = &getAnalysis(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); @@ -1578,24 +1782,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; - IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); + IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); - addMBBLiveIns(MF); - LIS->addKillFlags(); - - // Run rewriter - { - NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled); - VRM->rewrite(Indexes); - } - - // Write out new DBG_VALUE instructions. - DebugVars->emitDebugValues(VRM); - - // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); - return true; }