X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=inline;f=lib%2FCodeGen%2FSimpleRegisterCoalescing.cpp;h=c608b8d1c4aa5627b3f35f05b856a220ea3812b5;hb=22f07ffd27d1d721634d502c37267721d2e025cf;hp=59e9e4501a28edf622a853eb32f86fc4770abb21;hpb=be8db245f5cac73877bb2695992e6e47af3faa99;p=oota-llvm.git diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 59e9e4501a2..c608b8d1c4a 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -13,14 +13,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regcoalescing" -#include "llvm/CodeGen/SimpleRegisterCoalescing.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "SimpleRegisterCoalescing.h" #include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/Value.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SSARegMap.h" #include "llvm/CodeGen/RegisterCoalescer.h" @@ -47,6 +47,16 @@ namespace { cl::desc("Coalesce copies (default=true)"), cl::init(true)); + static cl::opt + NewHeuristic("new-coalescer-heuristic", + cl::desc("Use new coalescer heuristic"), + cl::init(false)); + + static cl::opt + ReMatSpillWeight("tweak-remat-spill-weight", + cl::desc("Tweak spill weight of re-materializable intervals"), + cl::init(true)); + RegisterPass X("simple-register-coalescing", "Simple Register Coalescing"); @@ -57,13 +67,12 @@ namespace { const PassInfo *llvm::SimpleRegisterCoalescingID = X.getPassInfo(); void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { - //AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(PHIEliminationID); AU.addPreservedID(TwoAddressInstructionPassID); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -178,59 +187,155 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, LiveInte if (UIdx != -1) ValLREndInst->getOperand(UIdx).unsetIsKill(); - // Finally, delete the copy instruction. - li_->RemoveMachineInstrFromMaps(CopyMI); - CopyMI->eraseFromParent(); ++numPeep; return true; } +/// AddSubRegIdxPairs - Recursively mark all the registers represented by the +/// specified register as sub-registers. The recursion level is expected to be +/// shallow. +void SimpleRegisterCoalescing::AddSubRegIdxPairs(unsigned Reg, unsigned SubIdx) { + std::vector &JoinedRegs = r2rRevMap_[Reg]; + for (unsigned i = 0, e = JoinedRegs.size(); i != e; ++i) { + SubRegIdxes.push_back(std::make_pair(JoinedRegs[i], SubIdx)); + AddSubRegIdxPairs(JoinedRegs[i], SubIdx); + } +} + +/// isBackEdgeCopy - Returns true if CopyMI is a back edge copy. +/// +bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI, + unsigned DstReg) { + MachineBasicBlock *MBB = CopyMI->getParent(); + const MachineLoop *L = loopInfo->getLoopFor(MBB); + if (!L) + return false; + if (MBB != L->getLoopLatch()) + return false; + + DstReg = rep(DstReg); + LiveInterval &LI = li_->getInterval(DstReg); + unsigned DefIdx = li_->getInstructionIndex(CopyMI); + LiveInterval::const_iterator DstLR = + LI.FindLiveRangeContaining(li_->getDefIndex(DefIdx)); + if (DstLR == LI.end()) + return false; + unsigned KillIdx = li_->getInstructionIndex(&MBB->back()) + InstrSlots::NUM-1; + if (DstLR->valno->kills.size() == 1 && DstLR->valno->kills[0] == KillIdx) + return true; + return false; +} + /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true -/// if the copy was successfully coalesced away, or if it is never possible -/// to coalesce this copy, due to register constraints. It returns -/// false if it is not currently possible to coalesce this interval, but -/// it may be possible if other things get coalesced. -bool SimpleRegisterCoalescing::JoinCopy(MachineInstr *CopyMI, - unsigned SrcReg, unsigned DstReg, bool PhysOnly) { +/// if the copy was successfully coalesced away. If it is not currently +/// possible to coalesce this interval, but it may be possible if other +/// things get coalesced, then it returns true by reference in 'Again'. +bool SimpleRegisterCoalescing::JoinCopy(CopyRec TheCopy, bool &Again) { + MachineInstr *CopyMI = TheCopy.MI; + + Again = false; + if (JoinedCopies.count(CopyMI)) + return false; // Already done. + DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI; // Get representative registers. + unsigned SrcReg = TheCopy.SrcReg; + unsigned DstReg = TheCopy.DstReg; unsigned repSrcReg = rep(SrcReg); unsigned repDstReg = rep(DstReg); // If they are already joined we continue. if (repSrcReg == repDstReg) { DOUT << "\tCopy already coalesced.\n"; - return true; // Not coalescable. + return false; // Not coalescable. } bool SrcIsPhys = MRegisterInfo::isPhysicalRegister(repSrcReg); bool DstIsPhys = MRegisterInfo::isPhysicalRegister(repDstReg); - if (PhysOnly && !SrcIsPhys && !DstIsPhys) - // Only joining physical registers with virtual registers in this round. - return true; // If they are both physical registers, we cannot join them. if (SrcIsPhys && DstIsPhys) { DOUT << "\tCan not coalesce physregs.\n"; - return true; // Not coalescable. + return false; // Not coalescable. } // We only join virtual registers with allocatable physical registers. if (SrcIsPhys && !allocatableRegs_[repSrcReg]) { DOUT << "\tSrc reg is unallocatable physreg.\n"; - return true; // Not coalescable. + return false; // Not coalescable. } if (DstIsPhys && !allocatableRegs_[repDstReg]) { DOUT << "\tDst reg is unallocatable physreg.\n"; - return true; // Not coalescable. + return false; // Not coalescable. } - - // If they are not of the same register class, we cannot join them. - if (differingRegisterClasses(repSrcReg, repDstReg)) { + + bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; + unsigned RealDstReg = 0; + if (isExtSubReg) { + unsigned SubIdx = CopyMI->getOperand(2).getImm(); + if (SrcIsPhys) + // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be + // coalesced with AX. + repSrcReg = mri_->getSubReg(repSrcReg, SubIdx); + else if (DstIsPhys) { + // If this is a extract_subreg where dst is a physical register, e.g. + // cl = EXTRACT_SUBREG reg1024, 1 + // then create and update the actual physical register allocated to RHS. + const TargetRegisterClass *RC=mf_->getSSARegMap()->getRegClass(repSrcReg); + for (const unsigned *SRs = mri_->getSuperRegisters(repDstReg); + unsigned SR = *SRs; ++SRs) { + if (repDstReg == mri_->getSubReg(SR, SubIdx) && + RC->contains(SR)) { + RealDstReg = SR; + break; + } + } + assert(RealDstReg && "Invalid extra_subreg instruction!"); + + // For this type of EXTRACT_SUBREG, conservatively + // check if the live interval of the source register interfere with the + // actual super physical register we are trying to coalesce with. + LiveInterval &RHS = li_->getInterval(repSrcReg); + if (li_->hasInterval(RealDstReg) && + RHS.overlaps(li_->getInterval(RealDstReg))) { + DOUT << "Interfere with register "; + DEBUG(li_->getInterval(RealDstReg).print(DOUT, mri_)); + return false; // Not coalescable + } + for (const unsigned* SR = mri_->getSubRegisters(RealDstReg); *SR; ++SR) + if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, mri_)); + return false; // Not coalescable + } + } else { + unsigned SrcSize= li_->getInterval(repSrcReg).getSize() / InstrSlots::NUM; + unsigned DstSize= li_->getInterval(repDstReg).getSize() / InstrSlots::NUM; + const TargetRegisterClass *RC=mf_->getSSARegMap()->getRegClass(repDstReg); + unsigned Threshold = allocatableRCRegs_[RC].count(); + // Be conservative. If both sides are virtual registers, do not coalesce + // if this will cause a high use density interval to target a smaller set + // of registers. + if (DstSize > Threshold || SrcSize > Threshold) { + LiveVariables::VarInfo &svi = lv_->getVarInfo(repSrcReg); + LiveVariables::VarInfo &dvi = lv_->getVarInfo(repDstReg); + if ((float)dvi.NumUses / DstSize < (float)svi.NumUses / SrcSize) { + Again = true; // May be possible to coalesce later. + return false; + } + } + } + } else if (differingRegisterClasses(repSrcReg, repDstReg)) { + // If they are not of the same register class, we cannot join them. DOUT << "\tSrc/Dest are different register classes.\n"; - return true; // Not coalescable. + // Allow the coalescer to try again in case either side gets coalesced to + // a physical register that's compatible with the other side. e.g. + // r1024 = MOV32to32_ r1025 + // but later r1024 is assigned EAX then r1025 may be coalesced with EAX. + Again = true; // May be possible to coalesce later. + return false; } LiveInterval &SrcInt = li_->getInterval(repSrcReg); @@ -286,14 +391,16 @@ bool SimpleRegisterCoalescing::JoinCopy(MachineInstr *CopyMI, // virtual register. Once the coalescing is done, it cannot be broken and // these are not spillable! If the destination interval uses are far away, // think twice about coalescing them! - if (!mopd->isDead() && (SrcIsPhys || DstIsPhys)) { + if (!mopd->isDead() && (SrcIsPhys || DstIsPhys) && !isExtSubReg) { LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; unsigned JoinVReg = SrcIsPhys ? repDstReg : repSrcReg; unsigned JoinPReg = SrcIsPhys ? repSrcReg : repDstReg; const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(JoinVReg); unsigned Threshold = allocatableRCRegs_[RC].count(); + if (TheCopy.isBackEdge) + Threshold *= 2; // Favors back edge copies. - // If the virtual register live interval is long has it has low use desity, + // If the virtual register live interval is long but it has low use desity, // do not join them, instead mark the physical register as its allocation // preference. unsigned Length = JoinVInt.getSize() / InstrSlots::NUM; @@ -303,6 +410,7 @@ bool SimpleRegisterCoalescing::JoinCopy(MachineInstr *CopyMI, JoinVInt.preference = JoinPReg; ++numAborts; DOUT << "\tMay tie down a physical register, abort!\n"; + Again = true; // May be possible to coalesce later. return false; } } @@ -340,11 +448,14 @@ bool SimpleRegisterCoalescing::JoinCopy(MachineInstr *CopyMI, // Coalescing failed. // If we can eliminate the copy without merging the live ranges, do so now. - if (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI)) + if (!isExtSubReg && AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI)) { + JoinedCopies.insert(CopyMI); return true; + } // Otherwise, we are unable to join the intervals. DOUT << "Interference!\n"; + Again = true; // May be possible to coalesce later. return false; } @@ -368,9 +479,32 @@ bool SimpleRegisterCoalescing::JoinCopy(MachineInstr *CopyMI, unsetRegisterKills(I->start, I->end, repDstReg); } + // If this is a extract_subreg where dst is a physical register, e.g. + // cl = EXTRACT_SUBREG reg1024, 1 + // then create and update the actual physical register allocated to RHS. + if (RealDstReg) { + LiveInterval &RealDstInt = li_->getOrCreateInterval(RealDstReg); + SmallSet CopiedValNos; + for (LiveInterval::Ranges::const_iterator I = ResSrcInt->ranges.begin(), + E = ResSrcInt->ranges.end(); I != E; ++I) { + LiveInterval::const_iterator DstLR = + ResDstInt->FindLiveRangeContaining(I->start); + assert(DstLR != ResDstInt->end() && "Invalid joined interval!"); + const VNInfo *DstValNo = DstLR->valno; + if (CopiedValNos.insert(DstValNo)) { + VNInfo *ValNo = RealDstInt.getNextValue(DstValNo->def, DstValNo->reg, + li_->getVNInfoAllocator()); + ValNo->hasPHIKill = DstValNo->hasPHIKill; + RealDstInt.addKills(ValNo, DstValNo->kills); + RealDstInt.MergeValueInAsValue(*ResDstInt, DstValNo, ValNo); + } + } + repDstReg = RealDstReg; + } + // Update the liveintervals of sub-registers. for (const unsigned *AS = mri_->getSubRegisters(repDstReg); *AS; ++AS) - li_->getInterval(*AS).MergeInClobberRanges(*ResSrcInt, + li_->getOrCreateInterval(*AS).MergeInClobberRanges(*ResSrcInt, li_->getVNInfoAllocator()); } else { // Merge use info if the destination is a virtual register. @@ -379,22 +513,51 @@ bool SimpleRegisterCoalescing::JoinCopy(MachineInstr *CopyMI, dVI.NumUses += sVI.NumUses; } - DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, mri_); - DOUT << "\n"; - // Remember these liveintervals have been joined. JoinedLIs.set(repSrcReg - MRegisterInfo::FirstVirtualRegister); if (MRegisterInfo::isVirtualRegister(repDstReg)) JoinedLIs.set(repDstReg - MRegisterInfo::FirstVirtualRegister); + if (isExtSubReg && !SrcIsPhys && !DstIsPhys) { + if (!Swapped) { + // Make sure we allocate the larger super-register. + ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator()); + std::swap(repSrcReg, repDstReg); + std::swap(ResSrcInt, ResDstInt); + } + unsigned SubIdx = CopyMI->getOperand(2).getImm(); + SubRegIdxes.push_back(std::make_pair(repSrcReg, SubIdx)); + AddSubRegIdxPairs(repSrcReg, SubIdx); + } + + if (NewHeuristic) { + for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(), + e = ResSrcInt->vni_end(); i != e; ++i) { + const VNInfo *vni = *i; + if (vni->def && vni->def != ~1U && vni->def != ~0U) { + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned SrcReg, DstReg; + if (CopyMI && tii_->isMoveInstr(*CopyMI, SrcReg, DstReg) && + JoinedCopies.count(CopyMI) == 0) { + unsigned LoopDepth = loopInfo->getLoopDepth(CopyMI->getParent()); + JoinQueue->push(CopyRec(CopyMI, SrcReg, DstReg, LoopDepth, + isBackEdgeCopy(CopyMI, DstReg))); + } + } + } + } + + DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, mri_); + DOUT << "\n"; + // repSrcReg is guarateed to be the register whose live interval that is // being merged. li_->removeInterval(repSrcReg); r2rMap_[repSrcReg] = repDstReg; + r2rRevMap_[repDstReg].push_back(repSrcReg); // Finally, delete the copy instruction. - li_->RemoveMachineInstrFromMaps(CopyMI); - CopyMI->eraseFromParent(); + JoinedCopies.insert(CopyMI); ++numPeep; ++numJoins; return true; @@ -575,6 +738,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS) // Okay, the final step is to loop over the RHS live intervals, adding them to // the LHS. + LHSValNo->hasPHIKill |= VNI->hasPHIKill; LHS.addKills(LHSValNo, VNI->kills); LHS.MergeRangesInAsValue(RHS, LHSValNo); LHS.weight += RHS.weight; @@ -698,7 +862,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, i != e; ++i) { VNInfo *VNI = *i; unsigned ValSrcReg = VNI->reg; - if (ValSrcReg == 0) // Src not defined by a copy? + if (VNI->def == ~1U ||ValSrcReg == 0) // Src not defined by a copy? continue; // DstReg is known to be a register in the LHS interval. If the src is @@ -716,7 +880,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, i != e; ++i) { VNInfo *VNI = *i; unsigned ValSrcReg = VNI->reg; - if (ValSrcReg == 0) // Src not defined by a copy? + if (VNI->def == ~1U || ValSrcReg == 0) // Src not defined by a copy? continue; // DstReg is known to be a register in the RHS interval. If the src is @@ -804,32 +968,34 @@ bool SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, } } + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap::iterator I = LHSValsDefinedFromRHS.begin(), + E = LHSValsDefinedFromRHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned LHSValID = LHSValNoAssignments[VNI->id]; + LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def); + NewVNInfo[LHSValID]->hasPHIKill |= VNI->hasPHIKill; + RHS.addKills(NewVNInfo[LHSValID], VNI->kills); + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap::iterator I = RHSValsDefinedFromLHS.begin(), + E = RHSValsDefinedFromLHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned RHSValID = RHSValNoAssignments[VNI->id]; + LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def); + NewVNInfo[RHSValID]->hasPHIKill |= VNI->hasPHIKill; + LHS.addKills(NewVNInfo[RHSValID], VNI->kills); + } + // If we get here, we know that we can coalesce the live ranges. Ask the // intervals to coalesce themselves now. if ((RHS.ranges.size() > LHS.ranges.size() && MRegisterInfo::isVirtualRegister(LHS.reg)) || MRegisterInfo::isPhysicalRegister(RHS.reg)) { - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap::iterator I = LHSValsDefinedFromRHS.begin(), - E = LHSValsDefinedFromRHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned LHSValID = LHSValNoAssignments[VNI->id]; - LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def); - RHS.addKills(NewVNInfo[LHSValID], VNI->kills); - } - RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo); Swapped = true; } else { - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap::iterator I = RHSValsDefinedFromLHS.begin(), - E = RHSValsDefinedFromLHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned RHSValID = RHSValNoAssignments[VNI->id]; - LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def); - LHS.addKills(NewVNInfo[RHSValID], VNI->kills); - } - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo); Swapped = false; } @@ -849,37 +1015,124 @@ namespace { }; } +/// getRepIntervalSize - Returns the size of the interval that represents the +/// specified register. +template +unsigned JoinPriorityQueue::getRepIntervalSize(unsigned Reg) { + return Rc->getRepIntervalSize(Reg); +} + +/// CopyRecSort::operator - Join priority queue sorting function. +/// +bool CopyRecSort::operator()(CopyRec left, CopyRec right) const { + // Inner loops first. + if (left.LoopDepth > right.LoopDepth) + return false; + else if (left.LoopDepth == right.LoopDepth) { + if (left.isBackEdge && !right.isBackEdge) + return false; + else if (left.isBackEdge == right.isBackEdge) { + // Join virtuals to physical registers first. + bool LDstIsPhys = MRegisterInfo::isPhysicalRegister(left.DstReg); + bool LSrcIsPhys = MRegisterInfo::isPhysicalRegister(left.SrcReg); + bool LIsPhys = LDstIsPhys || LSrcIsPhys; + bool RDstIsPhys = MRegisterInfo::isPhysicalRegister(right.DstReg); + bool RSrcIsPhys = MRegisterInfo::isPhysicalRegister(right.SrcReg); + bool RIsPhys = RDstIsPhys || RSrcIsPhys; + if (LIsPhys && !RIsPhys) + return false; + else if (LIsPhys == RIsPhys) { + // Join shorter intervals first. + unsigned LSize = 0; + unsigned RSize = 0; + if (LIsPhys) { + LSize = LDstIsPhys ? 0 : JPQ->getRepIntervalSize(left.DstReg); + LSize += LSrcIsPhys ? 0 : JPQ->getRepIntervalSize(left.SrcReg); + RSize = RDstIsPhys ? 0 : JPQ->getRepIntervalSize(right.DstReg); + RSize += RSrcIsPhys ? 0 : JPQ->getRepIntervalSize(right.SrcReg); + } else { + LSize = std::min(JPQ->getRepIntervalSize(left.DstReg), + JPQ->getRepIntervalSize(left.SrcReg)); + RSize = std::min(JPQ->getRepIntervalSize(right.DstReg), + JPQ->getRepIntervalSize(right.SrcReg)); + } + if (LSize < RSize) + return false; + } + } + } + return true; +} + void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector *TryAgain, bool PhysOnly) { + std::vector &TryAgain) { DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; - + + std::vector VirtCopies; + std::vector PhysCopies; + unsigned LoopDepth = loopInfo->getLoopDepth(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E;) { MachineInstr *Inst = MII++; - // If this isn't a copy, we can't join intervals. + // If this isn't a copy nor a extract_subreg, we can't join intervals. unsigned SrcReg, DstReg; - if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg)) continue; - - bool Done = JoinCopy(Inst, SrcReg, DstReg, PhysOnly); - if (TryAgain && !Done) - TryAgain->push_back(getCopyRec(Inst, SrcReg, DstReg)); + if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(1).getReg(); + } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg)) + continue; + + unsigned repSrcReg = rep(SrcReg); + unsigned repDstReg = rep(DstReg); + bool SrcIsPhys = MRegisterInfo::isPhysicalRegister(repSrcReg); + bool DstIsPhys = MRegisterInfo::isPhysicalRegister(repDstReg); + if (NewHeuristic) { + JoinQueue->push(CopyRec(Inst, SrcReg, DstReg, LoopDepth, + isBackEdgeCopy(Inst, DstReg))); + } else { + if (SrcIsPhys || DstIsPhys) + PhysCopies.push_back(CopyRec(Inst, SrcReg, DstReg, 0, false)); + else + VirtCopies.push_back(CopyRec(Inst, SrcReg, DstReg, 0, false)); + } + } + + if (NewHeuristic) + return; + + // Try coalescing physical register + virtual register first. + for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { + CopyRec &TheCopy = PhysCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { + CopyRec &TheCopy = VirtCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); } } void SimpleRegisterCoalescing::joinIntervals() { DOUT << "********** JOINING INTERVALS ***********\n"; + if (NewHeuristic) + JoinQueue = new JoinPriorityQueue(this); + JoinedLIs.resize(li_->getNumIntervals()); JoinedLIs.reset(); std::vector TryAgainList; - const LoopInfo &LI = getAnalysis(); - if (LI.begin() == LI.end()) { + if (loopInfo->begin() == loopInfo->end()) { // If there are no loops in the function, join intervals in function order. for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); I != E; ++I) - CopyCoalesceInMBB(I, &TryAgainList); + CopyCoalesceInMBB(I, TryAgainList); } else { // Otherwise, join intervals in inner loops before other intervals. // Unfortunately we can't just iterate over loop hierarchy here because @@ -888,31 +1141,58 @@ void SimpleRegisterCoalescing::joinIntervals() { // Join intervals in the function prolog first. We want to join physical // registers with virtual registers before the intervals got too long. std::vector > MBBs; - for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); I != E;++I) - MBBs.push_back(std::make_pair(LI.getLoopDepth(I->getBasicBlock()), I)); + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ + MachineBasicBlock *MBB = I; + MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); + } // Sort by loop depth. std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); // Finally, join intervals in loop nest order. for (unsigned i = 0, e = MBBs.size(); i != e; ++i) - CopyCoalesceInMBB(MBBs[i].second, NULL, true); - for (unsigned i = 0, e = MBBs.size(); i != e; ++i) - CopyCoalesceInMBB(MBBs[i].second, &TryAgainList, false); + CopyCoalesceInMBB(MBBs[i].second, TryAgainList); } // Joining intervals can allow other intervals to be joined. Iteratively join // until we make no progress. - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - - for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { - CopyRec &TheCopy = TryAgainList[i]; - if (TheCopy.MI && - JoinCopy(TheCopy.MI, TheCopy.SrcReg, TheCopy.DstReg)) { - TheCopy.MI = 0; // Mark this one as done. - ProgressMade = true; + if (NewHeuristic) { + SmallVector TryAgain; + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + while (!JoinQueue->empty()) { + CopyRec R = JoinQueue->pop(); + bool Again = false; + bool Success = JoinCopy(R, Again); + if (Success) + ProgressMade = true; + else if (Again) + TryAgain.push_back(R); + } + + if (ProgressMade) { + while (!TryAgain.empty()) { + JoinQueue->push(TryAgain.back()); + TryAgain.pop_back(); + } + } + } + } else { + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + CopyRec &TheCopy = TryAgainList[i]; + if (TheCopy.MI) { + bool Again = false; + bool Success = JoinCopy(TheCopy, Again); + if (Success || !Again) { + TheCopy.MI = 0; // Mark this one as done. + ProgressMade = true; + } + } } } } @@ -937,9 +1217,12 @@ void SimpleRegisterCoalescing::joinIntervals() { } RegNum = JoinedLIs.find_next(RegNum); } + + if (NewHeuristic) + delete JoinQueue; DOUT << "*** Register mapping ***\n"; - for (int i = 0, e = r2rMap_.size(); i != e; ++i) + for (unsigned i = 0, e = r2rMap_.size(); i != e; ++i) if (r2rMap_[i]) { DOUT << " reg " << i << " -> "; DEBUG(printRegName(r2rMap_[i])); @@ -950,7 +1233,7 @@ void SimpleRegisterCoalescing::joinIntervals() { /// Return true if the two specified registers belong to different register /// classes. The registers may be either phys or virt regs. bool SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA, - unsigned RegB) const { + unsigned RegB) const { // Get the register classes for the first reg. if (MRegisterInfo::isPhysicalRegister(RegA)) { @@ -1072,8 +1355,13 @@ void SimpleRegisterCoalescing::printRegName(unsigned reg) const { } void SimpleRegisterCoalescing::releaseMemory() { - r2rMap_.clear(); - JoinedLIs.clear(); + for (unsigned i = 0, e = r2rMap_.size(); i != e; ++i) + r2rRevMap_[i].clear(); + r2rRevMap_.clear(); + r2rMap_.clear(); + JoinedLIs.clear(); + SubRegIdxes.clear(); + JoinedCopies.clear(); } static bool isZeroLengthInterval(LiveInterval *li) { @@ -1091,6 +1379,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { tii_ = tm_->getInstrInfo(); li_ = &getAnalysis(); lv_ = &getAnalysis(); + loopInfo = &getAnalysis(); DOUT << "********** SIMPLE REGISTER COALESCING **********\n" << "********** Function: " @@ -1101,9 +1390,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { E = mri_->regclass_end(); I != E; ++I) allocatableRCRegs_.insert(std::make_pair(*I,mri_->getAllocatableSet(fn, *I))); - r2rMap_.grow(mf_->getSSARegMap()->getLastVirtReg()); + SSARegMap *RegMap = mf_->getSSARegMap(); + r2rMap_.grow(RegMap->getLastVirtReg()); + r2rRevMap_.grow(RegMap->getLastVirtReg()); // Join (coalesce) intervals if requested. + IndexedMap RegSubIdxMap; if (EnableJoining) { joinIntervals(); DOUT << "********** INTERVALS POST JOINING **********\n"; @@ -1111,16 +1403,30 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { I->second.print(DOUT, mri_); DOUT << "\n"; } + + // Delete all coalesced copies. + for (SmallPtrSet::iterator I = JoinedCopies.begin(), + E = JoinedCopies.end(); I != E; ++I) { + li_->RemoveMachineInstrFromMaps(*I); + (*I)->eraseFromParent(); + } + + // Transfer sub-registers info to SSARegMap now that coalescing information + // is complete. + RegSubIdxMap.grow(mf_->getSSARegMap()->getLastVirtReg()+1); + while (!SubRegIdxes.empty()) { + std::pair RI = SubRegIdxes.back(); + SubRegIdxes.pop_back(); + RegSubIdxMap[RI.first] = RI.second; + } } // perform a final pass over the instructions and compute spill // weights, coalesce virtual registers and remove identity moves. - const LoopInfo &loopInfo = getAnalysis(); - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); mbbi != mbbe; ++mbbi) { MachineBasicBlock* mbb = mbbi; - unsigned loopDepth = loopInfo.getLoopDepth(mbb->getBasicBlock()); + unsigned loopDepth = loopInfo->getLoopDepth(mbb); for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); mii != mie; ) { @@ -1150,16 +1456,23 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { if (mop.isRegister() && mop.getReg() && MRegisterInfo::isVirtualRegister(mop.getReg())) { // replace register with representative register - unsigned reg = rep(mop.getReg()); - mii->getOperand(i).setReg(reg); + unsigned OrigReg = mop.getReg(); + unsigned reg = rep(OrigReg); + unsigned SubIdx = RegSubIdxMap[OrigReg]; + if (SubIdx && MRegisterInfo::isPhysicalRegister(reg)) + mii->getOperand(i).setReg(mri_->getSubReg(reg, SubIdx)); + else { + mii->getOperand(i).setReg(reg); + mii->getOperand(i).setSubReg(SubIdx); + } // Multiple uses of reg by the same instruction. It should not // contribute to spill weight again. if (UniqueUses.count(reg) != 0) continue; LiveInterval &RegInt = li_->getInterval(reg); - float w = (mop.isUse()+mop.isDef()) * powf(10.0F, (float)loopDepth); - RegInt.weight += w; + RegInt.weight += + li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth); UniqueUses.insert(reg); } } @@ -1176,6 +1489,20 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // it and hope it will be easier to allocate for this li. if (isZeroLengthInterval(&LI)) LI.weight = HUGE_VALF; + else { + bool isLoad = false; + if (ReMatSpillWeight && li_->isReMaterializable(LI, isLoad)) { + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If non of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isLoad) + LI.weight *= 0.9F; + else + LI.weight *= 0.5F; + } + } // Slightly prefer live interval that has been assigned a preferred reg. if (LI.preference)