From 7e073baedb8232b9519dbe15ea141ff98ccfe6ae Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 9 Apr 2008 20:57:25 +0000 Subject: [PATCH] - More aggressively coalescing away copies whose source is defined by an implicit_def. - Added insert_subreg coalescing support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49448 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 11 +- lib/CodeGen/SimpleRegisterCoalescing.cpp | 340 +++++++++++++++++----- lib/CodeGen/SimpleRegisterCoalescing.h | 22 +- test/CodeGen/X86/ins_subreg_coalesce-1.ll | 24 ++ test/CodeGen/X86/ins_subreg_coalesce-2.ll | 7 + test/CodeGen/X86/ins_subreg_coalesce-3.ll | 93 ++++++ 6 files changed, 421 insertions(+), 76 deletions(-) create mode 100644 test/CodeGen/X86/ins_subreg_coalesce-1.ll create mode 100644 test/CodeGen/X86/ins_subreg_coalesce-2.ll create mode 100644 test/CodeGen/X86/ins_subreg_coalesce-3.ll diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 91528e297d7..0556f79e48a 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -217,6 +217,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg; if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || tii_->isMoveInstr(*mi, SrcReg, DstReg)) CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); @@ -372,6 +373,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg; if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || tii_->isMoveInstr(*mi, SrcReg, DstReg)) CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); @@ -459,6 +461,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg; if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || tii_->isMoveInstr(*MI, SrcReg, DstReg)) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(reg), CopyMI); @@ -594,6 +597,8 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { if (VNI->copy->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) return VNI->copy->getOperand(1).getReg(); + if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG) + return VNI->copy->getOperand(2).getReg(); unsigned SrcReg, DstReg; if (tii_->isMoveInstr(*VNI->copy, SrcReg, DstReg)) return SrcReg; @@ -949,6 +954,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, HasUse = false; HasDef = false; CanFold = false; + if (isRemoved(MI)) + break; goto RestartInstruction; } } else { @@ -1106,7 +1113,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, unsigned end = getBaseIndex(I->end-1) + InstrSlots::NUM; // First collect all the def / use in this live range that will be rewritten. - // Make sure they are sorted according instruction index. + // Make sure they are sorted according to instruction index. std::vector RewriteMIs; for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), re = mri_->reg_end(); ri != re; ) { @@ -1533,7 +1540,7 @@ addIntervalsForSpills(const LiveInterval &li, } } - // Else tell the spiller to issue a spill. + // Otherwise tell the spiller to issue a spill. if (!Folded) { LiveRange *LR = &nI.ranges[nI.ranges.size()-1]; bool isKill = LR->end == getStoreIndex(index); diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index acbc911d6ac..2e13ff92417 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -404,7 +404,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, /// isBackEdgeCopy - Returns true if CopyMI is a back edge copy. /// bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI, - unsigned DstReg) { + unsigned DstReg) const { MachineBasicBlock *MBB = CopyMI->getParent(); const MachineLoop *L = loopInfo->getLoopFor(MBB); if (!L) @@ -471,6 +471,35 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, } } +/// RemoveDeadImpDef - Remove implicit_def instructions which are "re-defining" +/// registers due to insert_subreg coalescing. e.g. +/// r1024 = op +/// r1025 = implicit_def +/// r1025 = insert_subreg r1025, r1024 +/// = op r1025 +/// => +/// r1025 = op +/// r1025 = implicit_def +/// r1025 = insert_subreg r1025, r1025 +/// = op r1025 +void +SimpleRegisterCoalescing::RemoveDeadImpDef(unsigned Reg, LiveInterval &LI) { + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), + E = mri_->reg_end(); I != E; ) { + MachineOperand &O = I.getOperand(); + MachineInstr *DefMI = &*I; + ++I; + if (!O.isDef()) + continue; + if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + continue; + if (!LI.liveBeforeAndAt(li_->getInstructionIndex(DefMI))) + continue; + li_->RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + } +} + /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate /// due to live range lengthening as the result of coalescing. void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, @@ -641,6 +670,82 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, removeIntervalIfEmpty(li, li_, tri_); } +/// CanCoalesceWithImpDef - Returns true if the specified copy instruction +/// from an implicit def to another register can be coalesced away. +bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, + LiveInterval &li, + LiveInterval &ImpLi) const{ + if (!CopyMI->killsRegister(ImpLi.reg)) + return false; + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx); + if (LR == li.end()) + return false; + if (LR->valno->hasPHIKill) + return false; + if (LR->valno->def != CopyIdx) + return false; + // Make sure all of val# uses are copies. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg), + UE = mri_->use_end(); UI != UE;) { + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); + LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); + if (ULR->valno != LR->valno) + continue; + // If the use is not a use, then it's not safe to coalesce the move. + unsigned SrcReg, DstReg; + if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg)) { + if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG && + UseMI->getOperand(1).getReg() == li.reg) + continue; + return false; + } + } + return true; +} + + +/// RemoveCopiesFromValNo - The specified value# is defined by an implicit +/// def and it is being removed. Turn all copies from this value# into +/// identity copies so they will be removed. +void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li, + VNInfo *VNI) { + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg), + UE = mri_->use_end(); UI != UE;) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); + LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); + if (ULR->valno != VNI) + continue; + if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) + continue; + // If the use is a copy, turn it into an identity copy. + unsigned SrcReg, DstReg; + if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg) || SrcReg != li.reg) + assert(0 && "Unexpected use of implicit def!"); + UseMO.setReg(DstReg); + JoinedCopies.insert(UseMI); + } +} + +static unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo* TRI) { + for (const unsigned *SRs = TRI->getSuperRegisters(Reg); + unsigned SR = *SRs; ++SRs) + if (Reg == TRI->getSubReg(SR, SubIdx) && RC->contains(SR)) + return SR; + return 0; +} + /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true /// if the copy was successfully coalesced away. If it is not currently @@ -658,10 +763,19 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { unsigned SrcReg; unsigned DstReg; bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; + bool isInsSubReg = CopyMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG; unsigned SubIdx = 0; if (isExtSubReg) { DstReg = CopyMI->getOperand(0).getReg(); SrcReg = CopyMI->getOperand(1).getReg(); + } else if (isInsSubReg) { + if (CopyMI->getOperand(2).getSubReg()) { + DOUT << "\tSource of insert_subreg is already coalesced " + << "to another register.\n"; + return false; // Not coalescable. + } + DstReg = CopyMI->getOperand(0).getReg(); + SrcReg = CopyMI->getOperand(2).getReg(); } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg)) { assert(0 && "Unrecognized copy instruction!"); return false; @@ -693,39 +807,46 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } unsigned RealDstReg = 0; - if (isExtSubReg) { - SubIdx = CopyMI->getOperand(2).getImm(); - if (SrcIsPhys) { + unsigned RealSrcReg = 0; + if (isExtSubReg || isInsSubReg) { + SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm(); + if (SrcIsPhys && isExtSubReg) { // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be // coalesced with AX. SrcReg = tri_->getSubReg(SrcReg, SubIdx); SubIdx = 0; - } else if (DstIsPhys) { + } else if (DstIsPhys && isInsSubReg) { + // EAX = INSERT_SUBREG EAX, r1024, 0 + DstReg = tri_->getSubReg(DstReg, SubIdx); + SubIdx = 0; + } else if ((DstIsPhys && isExtSubReg) || (SrcIsPhys && isInsSubReg)) { // If this is a extract_subreg where dst is a physical register, e.g. // cl = EXTRACT_SUBREG reg1024, 1 // then create and update the actual physical register allocated to RHS. - const TargetRegisterClass *RC = mri_->getRegClass(SrcReg); - for (const unsigned *SRs = tri_->getSuperRegisters(DstReg); - unsigned SR = *SRs; ++SRs) { - if (DstReg == tri_->getSubReg(SR, SubIdx) && - RC->contains(SR)) { - RealDstReg = SR; - break; - } + // Ditto for + // reg1024 = INSERT_SUBREG r1024, cl, 1 + const TargetRegisterClass *RC = + mri_->getRegClass(isExtSubReg ? SrcReg : DstReg); + if (isExtSubReg) { + RealDstReg = getMatchingSuperReg(DstReg, SubIdx, RC, tri_); + assert(RealDstReg && "Invalid extra_subreg instruction!"); + } else { + RealSrcReg = getMatchingSuperReg(SrcReg, SubIdx, RC, tri_); + assert(RealSrcReg && "Invalid extra_subreg instruction!"); } - assert(RealDstReg && "Invalid extra_subreg instruction!"); // For this type of EXTRACT_SUBREG, conservatively // check if the live interval of the source register interfere with the // actual super physical register we are trying to coalesce with. - LiveInterval &RHS = li_->getInterval(SrcReg); - if (li_->hasInterval(RealDstReg) && - RHS.overlaps(li_->getInterval(RealDstReg))) { + unsigned PhysReg = isExtSubReg ? RealDstReg : RealSrcReg; + LiveInterval &RHS = li_->getInterval(isExtSubReg ? SrcReg : DstReg); + if (li_->hasInterval(PhysReg) && + RHS.overlaps(li_->getInterval(PhysReg))) { DOUT << "Interfere with register "; - DEBUG(li_->getInterval(RealDstReg).print(DOUT, tri_)); + DEBUG(li_->getInterval(PhysReg).print(DOUT, tri_)); return false; // Not coalescable } - for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) + for (const unsigned* SR = tri_->getSubRegisters(PhysReg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DOUT << "Interfere with sub-register "; DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); @@ -733,17 +854,22 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } SubIdx = 0; } else { - unsigned SrcSize= li_->getInterval(SrcReg).getSize() / InstrSlots::NUM; - unsigned DstSize= li_->getInterval(DstReg).getSize() / InstrSlots::NUM; - const TargetRegisterClass *RC = mri_->getRegClass(DstReg); + unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; + unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; + unsigned LargeRegSize = + li_->getInterval(LargeReg).getSize() / InstrSlots::NUM; + unsigned SmallRegSize = + li_->getInterval(SmallReg).getSize() / InstrSlots::NUM; + const TargetRegisterClass *RC = mri_->getRegClass(SmallReg); unsigned Threshold = allocatableRCRegs_[RC].count(); // Be conservative. If both sides are virtual registers, do not coalesce // if this will cause a high use density interval to target a smaller set // of registers. - if (DstSize > Threshold || SrcSize > Threshold) { - LiveVariables::VarInfo &svi = lv_->getVarInfo(SrcReg); - LiveVariables::VarInfo &dvi = lv_->getVarInfo(DstReg); - if ((float)dvi.NumUses / DstSize < (float)svi.NumUses / SrcSize) { + if (SmallRegSize > Threshold || LargeRegSize > Threshold) { + LiveVariables::VarInfo &svi = lv_->getVarInfo(LargeReg); + LiveVariables::VarInfo &dvi = lv_->getVarInfo(SmallReg); + if ((float)dvi.NumUses / SmallRegSize < + (float)svi.NumUses / LargeRegSize) { Again = true; // May be possible to coalesce later. return false; } @@ -777,34 +903,36 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DOUT << ": "; // Check if it is necessary to propagate "isDead" property. - MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false); - bool isDead = mopd->isDead(); - - // We need to be careful about coalescing a source physical register with a - // virtual register. Once the coalescing is done, it cannot be broken and - // these are not spillable! If the destination interval uses are far away, - // think twice about coalescing them! - if (!isDead && (SrcIsPhys || DstIsPhys) && !isExtSubReg) { - LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; - unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; - unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; - const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - if (TheCopy.isBackEdge) - Threshold *= 2; // Favors back edge copies. - - // If the virtual register live interval is long but it has low use desity, - // do not join them, instead mark the physical register as its allocation - // preference. - unsigned Length = JoinVInt.getSize() / InstrSlots::NUM; - LiveVariables::VarInfo &vi = lv_->getVarInfo(JoinVReg); - if (Length > Threshold && - (((float)vi.NumUses / Length) < (1.0 / Threshold))) { - JoinVInt.preference = JoinPReg; - ++numAborts; - DOUT << "\tMay tie down a physical register, abort!\n"; - Again = true; // May be possible to coalesce later. - return false; + if (!isExtSubReg && !isInsSubReg) { + MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false); + bool isDead = mopd->isDead(); + + // We need to be careful about coalescing a source physical register with a + // virtual register. Once the coalescing is done, it cannot be broken and + // these are not spillable! If the destination interval uses are far away, + // think twice about coalescing them! + if (!isDead && (SrcIsPhys || DstIsPhys)) { + LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; + unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; + unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; + const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + if (TheCopy.isBackEdge) + Threshold *= 2; // Favors back edge copies. + + // If the virtual register live interval is long but it has low use desity, + // do not join them, instead mark the physical register as its allocation + // preference. + unsigned Length = JoinVInt.getSize() / InstrSlots::NUM; + LiveVariables::VarInfo &vi = lv_->getVarInfo(JoinVReg); + if (Length > Threshold && + (((float)vi.NumUses / Length) < (1.0 / Threshold))) { + JoinVInt.preference = JoinPReg; + ++numAborts; + DOUT << "\tMay tie down a physical register, abort!\n"; + Again = true; // May be possible to coalesce later. + return false; + } } } @@ -815,9 +943,10 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { bool Swapped = false; // If SrcInt is implicitly defined, it's safe to coalesce. bool isEmpty = SrcInt.empty(); - if (isEmpty && DstInt.getNumValNums() != 1) { + if (isEmpty && !CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) { // Only coalesce an empty interval (defined by implicit_def) with - // another interval that's defined by a single copy. + // another interval which has a valno defined by the CopyMI and the CopyMI + // is a kill of the implicit def. DOUT << "Not profitable!\n"; return false; } @@ -826,7 +955,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Coalescing failed. // If we can eliminate the copy without merging the live ranges, do so now. - if (!isExtSubReg && + if (!isExtSubReg && !isInsSubReg && (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { JoinedCopies.insert(CopyMI); @@ -855,8 +984,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If this is a extract_subreg where dst is a physical register, e.g. // cl = EXTRACT_SUBREG reg1024, 1 // then create and update the actual physical register allocated to RHS. - if (RealDstReg) { - LiveInterval &RealDstInt = li_->getOrCreateInterval(RealDstReg); + if (RealDstReg || RealSrcReg) { + LiveInterval &RealInt = + li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg); SmallSet CopiedValNos; for (LiveInterval::Ranges::const_iterator I = ResSrcInt->ranges.begin(), E = ResSrcInt->ranges.end(); I != E; ++I) { @@ -865,14 +995,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { assert(DstLR != ResDstInt->end() && "Invalid joined interval!"); const VNInfo *DstValNo = DstLR->valno; if (CopiedValNos.insert(DstValNo)) { - VNInfo *ValNo = RealDstInt.getNextValue(DstValNo->def, DstValNo->copy, - li_->getVNInfoAllocator()); + VNInfo *ValNo = RealInt.getNextValue(DstValNo->def, DstValNo->copy, + li_->getVNInfoAllocator()); ValNo->hasPHIKill = DstValNo->hasPHIKill; - RealDstInt.addKills(ValNo, DstValNo->kills); - RealDstInt.MergeValueInAsValue(*ResDstInt, DstValNo, ValNo); + RealInt.addKills(ValNo, DstValNo->kills); + RealInt.MergeValueInAsValue(*ResDstInt, DstValNo, ValNo); } } - DstReg = RealDstReg; + + DstReg = RealDstReg ? RealDstReg : RealSrcReg; } // Update the liveintervals of sub-registers. @@ -888,8 +1019,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the // larger super-register. - if (isExtSubReg && !SrcIsPhys && !DstIsPhys) { - if (!Swapped) { + if ((isExtSubReg || isInsSubReg) && !SrcIsPhys && !DstIsPhys) { + if ((isExtSubReg && !Swapped) || (isInsSubReg && Swapped)) { ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator()); std::swap(SrcReg, DstReg); std::swap(ResSrcInt, ResDstInt); @@ -927,6 +1058,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // SrcReg is guarateed to be the register whose live interval that is // being merged. li_->removeInterval(SrcReg); + if (isInsSubReg) + // Avoid: + // r1024 = op + // r1024 = implicit_def + // ... + // = r1024 + RemoveDeadImpDef(DstReg, *ResDstInt); UpdateRegDefsUses(SrcReg, DstReg, SubIdx); if (isEmpty) { @@ -937,7 +1075,19 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { LiveInterval::iterator LR = ResDstInt->FindLiveRangeContaining(CopyIdx); VNInfo *ImpVal = LR->valno; assert(ImpVal->def == CopyIdx); + unsigned NextDef = LR->end; + RemoveCopiesFromValNo(*ResDstInt, ImpVal); ResDstInt->removeValNo(ImpVal); + LR = ResDstInt->FindLiveRangeContaining(NextDef); + if (LR != ResDstInt->end() && LR->valno->def == NextDef) { + // Special case: vr1024 = implicit_def + // vr1024 = insert_subreg vr1024, vr1025, c + // The insert_subreg becomes a "copy" that defines a val# which can itself + // be coalesced away. + MachineInstr *DefMI = li_->getInstructionFromIndex(NextDef); + if (DefMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) + LR->valno->copy = DefMI; + } } DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_); @@ -1002,6 +1152,33 @@ static bool InVector(VNInfo *Val, const SmallVector &V) { return std::find(V.begin(), V.end(), Val) != V.end(); } +/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of +/// the specified live interval is defined by a copy from the specified +/// register. +bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, + LiveRange *LR, + unsigned Reg) { + unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno); + if (SrcReg == Reg) + return true; + if (LR->valno->def == ~0U && + TargetRegisterInfo::isPhysicalRegister(li.reg) && + *tri_->getSuperRegisters(li.reg)) { + // It's a sub-register live interval, we may not have precise information. + // Re-compute it. + MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start); + unsigned SrcReg, DstReg; + if (tii_->isMoveInstr(*DefMI, SrcReg, DstReg) && + DstReg == li.reg && SrcReg == Reg) { + // Cache computed info. + LR->valno->def = LR->start; + LR->valno->copy = DefMI; + return true; + } + } + return false; +} + /// SimpleJoin - Attempt to joint the specified interval into this one. The /// caller of this method must guarantee that the RHS only contains a single /// value number and that the RHS is not defined by a copy from this @@ -1045,8 +1222,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ // If we haven't already recorded that this value # is safe, check it. if (!InVector(LHSIt->valno, EliminatedLHSVals)) { // Copy from the RHS? - unsigned SrcReg = li_->getVNInfoSourceReg(LHSIt->valno); - if (SrcReg != RHS.reg) + if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) return false; // Nope, bail out. EliminatedLHSVals.push_back(LHSIt->valno); @@ -1073,7 +1249,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ } else { // Otherwise, if this is a copy from the RHS, mark it as being merged // in. - if (li_->getVNInfoSourceReg(LHSIt->valno) == RHS.reg) { + if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) { EliminatedLHSVals.push_back(LHSIt->valno); // We know this entire LHS live range is okay, so skip it now. @@ -1108,8 +1284,13 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ } } LHSValNo = Smallest; + } else if (EliminatedLHSVals.empty()) { + if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && + *tri_->getSuperRegisters(LHS.reg)) + // Imprecise sub-register information. Can't handle it. + return false; + assert(0 && "No copies from the RHS?"); } else { - assert(!EliminatedLHSVals.empty() && "No copies from the RHS?"); LHSValNo = EliminatedLHSVals[0]; } @@ -1422,6 +1603,7 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, std::vector VirtCopies; std::vector PhysCopies; + std::vector ImpDefCopies; unsigned LoopDepth = loopInfo->getLoopDepth(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E;) { @@ -1432,6 +1614,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); + } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(2).getReg(); } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg)) continue; @@ -1440,7 +1625,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, if (NewHeuristic) { JoinQueue->push(CopyRec(Inst, LoopDepth, isBackEdgeCopy(Inst, DstReg))); } else { - if (SrcIsPhys || DstIsPhys) + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + ImpDefCopies.push_back(CopyRec(Inst, 0, false)); + else if (SrcIsPhys || DstIsPhys) PhysCopies.push_back(CopyRec(Inst, 0, false)); else VirtCopies.push_back(CopyRec(Inst, 0, false)); @@ -1450,7 +1637,16 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, if (NewHeuristic) return; - // Try coalescing physical register + virtual register first. + // Try coalescing implicit copies first, followed by copies to / from + // physical registers, then finally copies from virtual registers to + // virtual registers. + for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { + CopyRec &TheCopy = ImpDefCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { CopyRec &TheCopy = PhysCopies[i]; bool Again = false; diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 453760a7e7b..7823ba0df4b 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -196,9 +196,25 @@ namespace llvm { MachineBasicBlock *MBB, unsigned DstReg, unsigned SrcReg); - /// isBackEdgeCopy - Returns true if CopyMI is a back edge copy. + /// CanCoalesceWithImpDef - Returns true if the specified copy instruction + /// from an implicit def to another register can be coalesced away. + bool CanCoalesceWithImpDef(MachineInstr *CopyMI, + LiveInterval &li, LiveInterval &ImpLi) const; + + /// RemoveCopiesFromValNo - The specified value# is defined by an implicit + /// def and it is being removed. Turn all copies from this value# into + /// identity copies so they will be removed. + void RemoveCopiesFromValNo(LiveInterval &li, VNInfo *VNI); + + /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of + /// the specified live interval is defined by a copy from the specified + /// register. + bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR, + unsigned Reg); + + /// isBackEdgeCopy - Return true if CopyMI is a back edge copy. /// - bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg); + bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg) const; /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a @@ -207,6 +223,8 @@ namespace llvm { /// subregister. void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + void RemoveDeadImpDef(unsigned Reg, LiveInterval &LI); + /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate /// due to live range lengthening as the result of coalescing. void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI); diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll new file mode 100644 index 00000000000..863cda94c5f --- /dev/null +++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 2 + +define fastcc i32 @sqlite3ExprResolveNames() nounwind { +entry: + br i1 false, label %UnifiedReturnBlock, label %bb4 +bb4: ; preds = %entry + br i1 false, label %bb17, label %bb22 +bb17: ; preds = %bb4 + ret i32 1 +bb22: ; preds = %bb4 + br i1 true, label %walkExprTree.exit, label %bb4.i +bb4.i: ; preds = %bb22 + ret i32 0 +walkExprTree.exit: ; preds = %bb22 + %tmp83 = load i16* null, align 4 ; [#uses=1] + %tmp84 = or i16 %tmp83, 2 ; [#uses=2] + store i16 %tmp84, i16* null, align 4 + %tmp98993 = zext i16 %tmp84 to i32 ; [#uses=1] + %tmp1004 = lshr i32 %tmp98993, 3 ; [#uses=1] + %tmp100.lobit5 = and i32 %tmp1004, 1 ; [#uses=1] + ret i32 %tmp100.lobit5 +UnifiedReturnBlock: ; preds = %entry + ret i32 0 +} diff --git a/test/CodeGen/X86/ins_subreg_coalesce-2.ll b/test/CodeGen/X86/ins_subreg_coalesce-2.ll new file mode 100644 index 00000000000..5c0b0d3d3e9 --- /dev/null +++ b/test/CodeGen/X86/ins_subreg_coalesce-2.ll @@ -0,0 +1,7 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | not grep movw + +define i16 @test5(i16 %f12) nounwind { + %f11 = shl i16 %f12, 2 ; [#uses=1] + %tmp7.25 = ashr i16 %f11, 8 ; [#uses=1] + ret i16 %tmp7.25 +} diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll new file mode 100644 index 00000000000..ead920a482a --- /dev/null +++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll @@ -0,0 +1,93 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 9 + + %struct.COMPOSITE = type { i8, i16, i16 } + %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } + %struct.FILE_POS = type { i8, i8, i16, i32 } + %struct.FIRST_UNION = type { %struct.FILE_POS } + %struct.FONT_INFO = type { %struct.metrics*, i8*, i16*, %struct.COMPOSITE*, i32, %struct.rec*, %struct.rec*, i16, i16, i16*, i8*, i8*, i16* } + %struct.FOURTH_UNION = type { %struct.STYLE } + %struct.GAP = type { i8, i8, i16 } + %struct.LIST = type { %struct.rec*, %struct.rec* } + %struct.SECOND_UNION = type { { i16, i8, i8 } } + %struct.STYLE = type { { %struct.GAP }, { %struct.GAP }, i16, i16, i32 } + %struct.THIRD_UNION = type { %struct.FILE*, [8 x i8] } + %struct.__sFILEX = type opaque + %struct.__sbuf = type { i8*, i32 } + %struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 } + %struct.metrics = type { i16, i16, i16, i16, i16 } + %struct.rec = type { %struct.head_type } + +define void @FontChange() { +entry: + br i1 false, label %bb298, label %bb49 +bb49: ; preds = %entry + ret void +bb298: ; preds = %entry + br i1 false, label %bb304, label %bb366 +bb304: ; preds = %bb298 + br i1 false, label %bb330, label %bb428 +bb330: ; preds = %bb366, %bb304 + br label %bb366 +bb366: ; preds = %bb330, %bb298 + br i1 false, label %bb330, label %bb428 +bb428: ; preds = %bb366, %bb304 + br i1 false, label %bb650, label %bb433 +bb433: ; preds = %bb428 + ret void +bb650: ; preds = %bb650, %bb428 + %tmp658 = load i8* null, align 8 ; [#uses=1] + %tmp659 = icmp eq i8 %tmp658, 0 ; [#uses=1] + br i1 %tmp659, label %bb650, label %bb662 +bb662: ; preds = %bb650 + %tmp685 = icmp eq %struct.rec* null, null ; [#uses=1] + br i1 %tmp685, label %bb761, label %bb688 +bb688: ; preds = %bb662 + ret void +bb761: ; preds = %bb662 + %tmp487248736542 = load i32* null, align 4 ; [#uses=2] + %tmp487648776541 = and i32 %tmp487248736542, 57344 ; [#uses=1] + %tmp4881 = icmp eq i32 %tmp487648776541, 8192 ; [#uses=1] + br i1 %tmp4881, label %bb4884, label %bb4897 +bb4884: ; preds = %bb761 + %tmp488948906540 = and i32 %tmp487248736542, 7168 ; [#uses=1] + %tmp4894 = icmp eq i32 %tmp488948906540, 1024 ; [#uses=1] + br i1 %tmp4894, label %bb4932, label %bb4897 +bb4897: ; preds = %bb4884, %bb761 + ret void +bb4932: ; preds = %bb4884 + %tmp4933 = load i32* null, align 4 ; [#uses=1] + br i1 false, label %bb5054, label %bb4940 +bb4940: ; preds = %bb4932 + %tmp4943 = load i32* null, align 4 ; [#uses=2] + switch i32 %tmp4933, label %bb5054 [ + i32 159, label %bb4970 + i32 160, label %bb5002 + ] +bb4970: ; preds = %bb4940 + %tmp49746536 = trunc i32 %tmp4943 to i16 ; [#uses=1] + %tmp49764977 = and i16 %tmp49746536, 4095 ; [#uses=1] + %mask498049814982 = zext i16 %tmp49764977 to i64 ; [#uses=1] + %tmp4984 = getelementptr %struct.FONT_INFO* null, i64 %mask498049814982, i32 5 ; <%struct.rec**> [#uses=1] + %tmp4985 = load %struct.rec** %tmp4984, align 8 ; <%struct.rec*> [#uses=1] + %tmp4988 = getelementptr %struct.rec* %tmp4985, i64 0, i32 0, i32 3 ; <%struct.THIRD_UNION*> [#uses=1] + %tmp4991 = bitcast %struct.THIRD_UNION* %tmp4988 to i32* ; [#uses=1] + %tmp4992 = load i32* %tmp4991, align 8 ; [#uses=1] + %tmp49924993 = trunc i32 %tmp4992 to i16 ; [#uses=1] + %tmp4996 = add i16 %tmp49924993, 0 ; [#uses=1] + br label %bb5054 +bb5002: ; preds = %bb4940 + %tmp50066537 = trunc i32 %tmp4943 to i16 ; [#uses=1] + %tmp50085009 = and i16 %tmp50066537, 4095 ; [#uses=1] + %mask501250135014 = zext i16 %tmp50085009 to i64 ; [#uses=1] + %tmp5016 = getelementptr %struct.FONT_INFO* null, i64 %mask501250135014, i32 5 ; <%struct.rec**> [#uses=1] + %tmp5017 = load %struct.rec** %tmp5016, align 8 ; <%struct.rec*> [#uses=1] + %tmp5020 = getelementptr %struct.rec* %tmp5017, i64 0, i32 0, i32 3 ; <%struct.THIRD_UNION*> [#uses=1] + %tmp5023 = bitcast %struct.THIRD_UNION* %tmp5020 to i32* ; [#uses=1] + %tmp5024 = load i32* %tmp5023, align 8 ; [#uses=1] + %tmp50245025 = trunc i32 %tmp5024 to i16 ; [#uses=1] + %tmp5028 = sub i16 %tmp50245025, 0 ; [#uses=1] + br label %bb5054 +bb5054: ; preds = %bb5002, %bb4970, %bb4940, %bb4932 + %flen.0.reg2mem.0 = phi i16 [ %tmp4996, %bb4970 ], [ %tmp5028, %bb5002 ], [ 0, %bb4932 ], [ undef, %bb4940 ] ; [#uses=0] + ret void +} -- 2.34.1