X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegisterCoalescer.cpp;h=c1ff13ec7ca064a069fa6b12872c05bfe256cb50;hb=HEAD;hp=70d2e0e18ce3be67e1e7af301e493f87ab453eed;hpb=1458e0596703f85d6a6a8041c05ad39c297b3190;p=oota-llvm.git diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 70d2e0e18ce..c1ff13ec7ca 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -58,6 +57,10 @@ EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true)); +static cl::opt UseTerminalRule("terminal-rule", + cl::desc("Apply the terminal rule"), + cl::init(false), cl::Hidden); + /// Temporary flag to test critical edge unsplitting. static cl::opt EnableJoinSplits("join-splitedges", @@ -89,7 +92,7 @@ namespace { /// A LaneMask to remember on which subregister live ranges we need to call /// shrinkToUses() later. - unsigned ShrinkMask; + LaneBitmask ShrinkMask; /// True if the main range of the currently coalesced intervals should be /// checked for smaller live intervals. @@ -161,12 +164,12 @@ namespace { /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange /// lanemasks already adjusted to the coalesced register. void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, - unsigned LaneMask, CoalescerPair &CP); + LaneBitmask LaneMask, CoalescerPair &CP); /// Join the liveranges of two subregisters. Joins @p RRange into /// @p LRange, @p RRange may be invalid afterwards. void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, - unsigned LaneMask, const CoalescerPair &CP); + LaneBitmask LaneMask, const CoalescerPair &CP); /// We found a non-trivially-coalescable copy. If the source value number is /// defined by a copy from the destination reg see if we can merge these two @@ -188,7 +191,7 @@ namespace { /// If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, + bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy); /// Return true if a copy involving a physreg should be joined. @@ -204,6 +207,33 @@ namespace { /// Returns true if @p CopyMI was a copy of an undef value and eliminated. bool eliminateUndefCopy(MachineInstr *CopyMI); + /// Check whether or not we should apply the terminal rule on the + /// destination (Dst) of \p Copy. + /// When the terminal rule applies, Copy is not profitable to + /// coalesce. + /// Dst is terminal if it has exactly one affinity (Dst, Src) and + /// at least one interference (Dst, Dst2). If Dst is terminal, the + /// terminal rule consists in checking that at least one of + /// interfering node, say Dst2, has an affinity of equal or greater + /// weight with Src. + /// In that case, Dst2 and Dst will not be able to be both coalesced + /// with Src. Since Dst2 exposes more coalescing opportunities than + /// Dst, we can drop \p Copy. + bool applyTerminalRule(const MachineInstr &Copy) const; + + /// Wrapper method for \see LiveIntervals::shrinkToUses. + /// This method does the proper fixing of the live-ranges when the afore + /// mentioned method returns true. + void shrinkToUses(LiveInterval *LI, + SmallVectorImpl *Dead = nullptr) { + if (LIS->shrinkToUses(LI, Dead)) { + /// Check whether or not \p LI is composed by multiple connected + /// components and if that is the case, fix that. + SmallVector SplitLIs; + LIS->splitSeparateComponents(*LI, SplitLIs); + } + } + public: static char ID; ///< Class identification, replacement for typeinfo RegisterCoalescer() : MachineFunctionPass(ID) { @@ -229,7 +259,7 @@ INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) @@ -407,7 +437,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -536,7 +566,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // will also add the isKill marker. CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); if (AS->end == CopyIdx) - LIS->shrinkToUses(&IntA); + shrinkToUses(&IntA); ++numExtends; return true; @@ -633,14 +663,18 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, unsigned UseOpIdx; if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) return false; - unsigned Op1, Op2, NewDstIdx; - if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) - return false; - if (Op1 == UseOpIdx) - NewDstIdx = Op2; - else if (Op2 == UseOpIdx) - NewDstIdx = Op1; - else + + // FIXME: The code below tries to commute 'UseOpIdx' operand with some other + // commutable operand which is expressed by 'CommuteAnyOperandIndex'value + // passed to the method. That _other_ operand is chosen by + // the findCommutedOpIndices() method. + // + // That is obviously an area for improvement in case of instructions having + // more than 2 operands. For example, if some instruction has 3 commutable + // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3, + // op#2<->op#3) of commute transformation should be considered/tried here. + unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; + if (!TII->findCommutedOpIndices(DefMI, UseOpIdx, NewDstIdx)) return false; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); @@ -673,7 +707,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // At this point we have decided that it is legal to do this // transformation. Start by commuting the instruction. MachineBasicBlock *MBB = DefMI->getParent(); - MachineInstr *NewMI = TII->commuteInstruction(DefMI); + MachineInstr *NewMI = + TII->commuteInstruction(DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return false; if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && @@ -738,15 +773,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); - BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); + BValNo = IntB.MergeValueNumberInto(DVNI, BValNo); for (LiveInterval::SubRange &S : IntB.subranges()) { VNInfo *SubDVNI = S.getVNInfoAt(DefIdx); if (!SubDVNI) continue; VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); assert(SubBValNo->def == CopyIdx); - VNInfo *Merged = S.MergeValueNumberInto(SubBValNo, SubDVNI); - Merged->def = CopyIdx; + S.MergeValueNumberInto(SubDVNI, SubBValNo); } ErasedInstrs.insert(UseMI); @@ -759,7 +793,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); if (IntB.hasSubRanges()) { if (!IntA.hasSubRanges()) { - unsigned Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); IntA.createSubRangeFrom(Allocator, Mask, IntA); } SlotIndex AIdx = CopyIdx.getRegSlot(true); @@ -767,20 +801,21 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); assert(ASubValNo != nullptr); - unsigned AMask = SA.LaneMask; + LaneBitmask AMask = SA.LaneMask; for (LiveInterval::SubRange &SB : IntB.subranges()) { - unsigned BMask = SB.LaneMask; - unsigned Common = BMask & AMask; + LaneBitmask BMask = SB.LaneMask; + LaneBitmask Common = BMask & AMask; if (Common == 0) continue; - DEBUG( - dbgs() << format("\t\tCopy+Merge %04X into %04X\n", BMask, Common)); - unsigned BRest = BMask & ~AMask; + DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask) + << " into " << PrintLaneMask(Common) << '\n'); + LaneBitmask BRest = BMask & ~AMask; LiveInterval::SubRange *CommonRange; if (BRest != 0) { SB.LaneMask = BRest; - DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", BRest)); + DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest) + << '\n'); // Duplicate SubRange for newly merged common stuff. CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB); } else { @@ -797,12 +832,11 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, AMask &= ~BMask; } if (AMask != 0) { - DEBUG(dbgs() << format("\t\tNew Lane %04X\n", AMask)); + DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n'); LiveRange *NewRange = IntB.createSubRange(Allocator, AMask); VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator); addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo); } - SA.removeValNo(ASubValNo); } } @@ -810,23 +844,30 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); - IntA.removeValNo(AValNo); - // Remove valuenos in subranges (the A+B have subranges case has already been - // handled above) - if (!IntB.hasSubRanges()) { - SlotIndex AIdx = CopyIdx.getRegSlot(true); - for (LiveInterval::SubRange &SA : IntA.subranges()) { - VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); - assert(ASubValNo != nullptr); - SA.removeValNo(ASubValNo); - } - } + LIS->removeVRegDefAt(IntA, AValNo->def); + DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); ++numCommutes; return true; } -bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, +/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just +/// defining a subregister. +static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && + "This code cannot handle physreg aliasing"); + for (const MachineOperand &Op : MI.operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + // Return true if we define the full register or don't care about the value + // inside other subregisters. + if (Op.getSubReg() == 0 || Op.isUndef()) + return true; + } + return false; +} + +bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy) { IsDefCopy = false; @@ -854,8 +895,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) return false; + if (!definesFullReg(*DefMI, SrcReg)) + return false; bool SawStore = false; - if (!DefMI->isSafeToMove(TII, AA, SawStore)) + if (!DefMI->isSafeToMove(AA, SawStore)) return false; const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) @@ -902,6 +945,28 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); MachineInstr *NewMI = std::prev(MII); + // In a situation like the following: + // %vreg0:subreg = instr ; DefMI, subreg = DstIdx + // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0 + // instead of widening %vreg1 to the register class of %vreg0 simply do: + // %vreg1 = instr + const TargetRegisterClass *NewRC = CP.getNewRC(); + if (DstIdx != 0) { + MachineOperand &DefMO = NewMI->getOperand(0); + if (DefMO.getSubReg() == DstIdx) { + assert(SrcIdx == 0 && CP.isFlipped() + && "Shouldn't have SrcIdx+DstIdx at this point"); + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + const TargetRegisterClass *CommonRC = + TRI->getCommonSubClass(DefRC, DstRC); + if (CommonRC != nullptr) { + NewRC = CommonRC; + DstIdx = 0; + DefMO.setSubReg(0); + } + } + } + LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); ErasedInstrs.insert(CopyMI); @@ -913,23 +978,23 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, for (unsigned i = NewMI->getDesc().getNumOperands(), e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); - if (MO.isReg()) { - assert(MO.isDef() && MO.isImplicit() && MO.isDead() && + if (MO.isReg() && MO.isDef()) { + assert(MO.isImplicit() && MO.isDead() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())); NewMIImplDefs.push_back(MO.getReg()); } } if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - const TargetRegisterClass *NewRC = CP.getNewRC(); unsigned NewIdx = NewMI->getOperand(0).getSubReg(); - if (NewIdx) - NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); - else - NewRC = TRI->getCommonSubClass(NewRC, DefRC); - - assert(NewRC && "subreg chosen for remat incompatible with instruction"); + if (DefRC != nullptr) { + if (NewIdx) + NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); + else + NewRC = TRI->getCommonSubClass(NewRC, DefRC); + assert(NewRC && "subreg chosen for remat incompatible with instruction"); + } MRI->setRegClass(DstReg, NewRC); updateRegDefsUses(DstReg, DstReg, DstIdx); @@ -996,7 +1061,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, ++NumReMats; // The source interval can become smaller because we removed a use. - LIS->shrinkToUses(&SrcInt, &DeadDefs); + shrinkToUses(&SrcInt, &DeadDefs); if (!DeadDefs.empty()) { // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs // to describe DstReg instead. @@ -1013,13 +1078,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return true; } -static void removeUndefValue(LiveRange &LR, SlotIndex At) -{ - VNInfo *VNInfo = LR.getVNInfoAt(At); - assert(VNInfo != nullptr && SlotIndex::isSameInstr(VNInfo->def, At)); - LR.removeValNo(VNInfo); -} - bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { // ProcessImpicitDefs may leave some copies of values, it only removes // local variables. When we have a copy like: @@ -1039,7 +1097,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { const LiveInterval &SrcLI = LIS->getInterval(SrcReg); // CopyMI is undef iff SrcReg is not live before the instruction. if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { - unsigned SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); + LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { if ((SR.LaneMask & SrcMask) == 0) continue; @@ -1053,22 +1111,25 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { // Remove any DstReg segments starting at the instruction. LiveInterval &DstLI = LIS->getInterval(DstReg); - unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); SlotIndex RegIndex = Idx.getRegSlot(); - for (LiveInterval::SubRange &SR : DstLI.subranges()) { - if ((SR.LaneMask & DstMask) == 0) - continue; - removeUndefValue(SR, RegIndex); - - DstLI.removeEmptySubRanges(); - } // Remove value or merge with previous one in case of a subregister def. if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) { - VNInfo *VNInfo = DstLI.getVNInfoAt(RegIndex); - DstLI.MergeValueNumberInto(VNInfo, PrevVNI); - } else { - removeUndefValue(DstLI, RegIndex); - } + VNInfo *VNI = DstLI.getVNInfoAt(RegIndex); + DstLI.MergeValueNumberInto(VNI, PrevVNI); + + // The affected subregister segments can be removed. + LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); + for (LiveInterval::SubRange &SR : DstLI.subranges()) { + if ((SR.LaneMask & DstMask) == 0) + continue; + + VNInfo *SVNI = SR.getVNInfoAt(RegIndex); + assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex)); + SR.removeValNo(SVNI); + } + DstLI.removeEmptySubRanges(); + } else + LIS->removeVRegDefAt(DstLI, RegIndex); // Mark uses as undef. for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) { @@ -1076,7 +1137,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { continue; const MachineInstr &MI = *MO.getParent(); SlotIndex UseIdx = LIS->getInstructionIndex(&MI); - unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); bool isLive; if (UseMask != ~0u && DstLI.hasSubRanges()) { isLive = false; @@ -1139,13 +1200,13 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. - if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) { + if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); } - unsigned Mask = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx); bool IsUndef = true; SlotIndex MIIdx = UseMI->isDebugValue() ? LIS->getSlotIndexes()->getIndexBefore(UseMI) @@ -1374,14 +1435,15 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { for (LiveInterval::SubRange &S : LI.subranges()) { if ((S.LaneMask & ShrinkMask) == 0) continue; - DEBUG(dbgs() << "Shrink LaneUses (Lane " - << format("%04X", S.LaneMask) << ")\n"); + DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) + << ")\n"); LIS->shrinkToUses(S, LI.reg); } + LI.removeEmptySubRanges(); } if (ShrinkMainRange) { LiveInterval &LI = LIS->getInterval(CP.getDstReg()); - LIS->shrinkToUses(&LI); + shrinkToUses(&LI); } // SrcReg is guaranteed to be the register whose live interval that is @@ -1389,7 +1451,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { LIS->removeInterval(CP.getSrcReg()); // Update regalloc hint. - TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); + TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) @@ -1459,6 +1521,14 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { DEBUG(dbgs() << "\t\tInterference (read): " << *MI); return false; } + + // We must also check for clobbers caused by regmasks. + for (const auto &MO : MI->operands()) { + if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) { + DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI); + return false; + } + } } // We're going to remove the copy which defines a physical reserved @@ -1564,7 +1634,7 @@ class JoinVals { const unsigned SubIdx; /// The LaneMask that this liverange will occupy the coalesced register. May /// be smaller than the lanemask produced by SubIdx when merging subranges. - const unsigned LaneMask; + const LaneBitmask LaneMask; /// This is true when joining sub register ranges, false when joining main /// ranges. @@ -1619,11 +1689,11 @@ class JoinVals { ConflictResolution Resolution; /// Lanes written by this def, 0 for unanalyzed values. - unsigned WriteLanes; + LaneBitmask WriteLanes; /// Lanes with defined values in this register. Other lanes are undef and /// safe to clobber. - unsigned ValidLanes; + LaneBitmask ValidLanes; /// Value in LI being redefined by this def. VNInfo *RedefVNI; @@ -1664,7 +1734,7 @@ class JoinVals { /// Compute the bitmask of lanes actually written by DefMI. /// Set Redef if there are any partial register definitions that depend on the /// previous value of the register. - unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; + LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; /// Find the ultimate value that VNI was copied from. std::pair followCopyChain(const VNInfo *VNI) const; @@ -1700,12 +1770,12 @@ class JoinVals { /// entry to TaintedVals. /// /// Returns false if the tainted lanes extend beyond the basic block. - bool taintExtent(unsigned, unsigned, JoinVals&, - SmallVectorImpl >&); + bool taintExtent(unsigned, LaneBitmask, JoinVals&, + SmallVectorImpl >&); /// Return true if MI uses any of the given Lanes from Reg. /// This does not include partial redefinitions of Reg. - bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const; + bool usesLanes(const MachineInstr *MI, unsigned, unsigned, LaneBitmask) const; /// Determine if ValNo is a copy of a value number in LR or Other.LR that will /// be pruned: @@ -1716,7 +1786,7 @@ class JoinVals { bool isPrunedValue(unsigned ValNo, JoinVals &Other); public: - JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, unsigned LaneMask, + JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask, SmallVectorImpl &newVNInfo, const CoalescerPair &cp, LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, bool TrackSubRegLiveness) @@ -1742,8 +1812,8 @@ public: /// Removes subranges starting at copies that get removed. This sometimes /// happens when undefined subranges are copied around. These ranges contain - /// no usefull information and can be removed. - void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask); + /// no useful information and can be removed. + void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask); /// Erase any machine instructions that have been coalesced away. /// Add erased instructions to ErasedInstrs. @@ -1752,20 +1822,23 @@ public: void eraseInstrs(SmallPtrSetImpl &ErasedInstrs, SmallVectorImpl &ShrinkRegs); + /// Remove liverange defs at places where implicit defs will be removed. + void removeImplicitDefs(); + /// Get the value assignments suitable for passing to LiveInterval::join. const int *getAssignments() const { return Assignments.data(); } }; } // end anonymous namespace -unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) +LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const { - unsigned L = 0; - for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->getReg() != Reg || !MO->isDef()) + LaneBitmask L = 0; + for (const MachineOperand &MO : DefMI->operands()) { + if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef()) continue; L |= TRI->getSubRegIndexLaneMask( - TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); - if (MO->readsReg()) + TRI->composeSubRegIndices(SubIdx, MO.getSubReg())); + if (MO.readsReg()) Redef = true; } return L; @@ -1796,7 +1869,7 @@ std::pair JoinVals::followCopyChain( ValueIn = nullptr; for (const LiveInterval::SubRange &S : LI.subranges()) { // Transform lanemask to a mask in the joined live interval. - unsigned SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); + LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); if ((SMask & LaneMask) == 0) continue; LiveQueryResult LRQ = S.Query(Def); @@ -1845,14 +1918,18 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. - unsigned Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); V.ValidLanes = V.WriteLanes = Lanes; } else { DefMI = Indexes->getInstructionFromIndex(VNI->def); assert(DefMI != nullptr); if (SubRangeJoin) { // We don't care about the lanes when joining subregister ranges. - V.ValidLanes = V.WriteLanes = 1; + V.WriteLanes = V.ValidLanes = 1; + if (DefMI->isImplicitDef()) { + V.ValidLanes = 0; + V.ErasableImplicitDef = true; + } } else { bool Redef = false; V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); @@ -2103,8 +2180,8 @@ bool JoinVals::mapValues(JoinVals &Other) { } bool JoinVals:: -taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, - SmallVectorImpl > &TaintExtent) { +taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, + SmallVectorImpl > &TaintExtent) { VNInfo *VNI = LR.getValNumInfo(ValNo); MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); @@ -2143,16 +2220,16 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, } bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, - unsigned Lanes) const { + LaneBitmask Lanes) const { if (MI->isDebugValue()) return false; - for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) continue; - if (!MO->readsReg()) + if (!MO.readsReg()) continue; if (Lanes & TRI->getSubRegIndexLaneMask( - TRI->composeSubRegIndices(SubIdx, MO->getSubReg()))) + TRI->composeSubRegIndices(SubIdx, MO.getSubReg()))) return true; } return false; @@ -2177,8 +2254,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the // join, those lanes will be tainted with a wrong value. Get the extent of // the tainted lanes. - unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; - SmallVector, 8> TaintExtent; + LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes; + SmallVector, 8> TaintExtent; if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) // Tainted lanes would extend beyond the basic block. return false; @@ -2261,11 +2338,11 @@ void JoinVals::pruneValues(JoinVals &Other, // Remove flags. This def is now a partial redef. // Also remove flags since the joined live range will // continue past this instruction. - for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); - MO.isValid(); ++MO) { - if (MO->isReg() && MO->isDef() && MO->getReg() == Reg) { - MO->setIsUndef(EraseImpDef); - MO->setIsDead(false); + for (MachineOperand &MO : + Indexes->getInstructionFromIndex(Def)->operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { + MO.setIsUndef(EraseImpDef); + MO.setIsDead(false); } } } @@ -2297,7 +2374,7 @@ void JoinVals::pruneValues(JoinVals &Other, } } -void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) +void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // Look for values being erased. bool DidPrune = false; @@ -2314,7 +2391,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) // copied and we must remove that subrange value as well. VNInfo *ValueOut = Q.valueOutOrDead(); if (ValueOut != nullptr && Q.valueIn() == nullptr) { - DEBUG(dbgs() << "\t\tPrune sublane " << format("%04X", S.LaneMask) + DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) << " at " << Def << "\n"); LIS->pruneValue(S, Def, nullptr); DidPrune = true; @@ -2323,10 +2400,10 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) continue; } // If a subrange ends at the copy, then a value was copied but only - // partially used later. Shrink the subregister range apropriately. + // partially used later. Shrink the subregister range appropriately. if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { - DEBUG(dbgs() << "\t\tDead uses at sublane " - << format("%04X", S.LaneMask) << " at " << Def << "\n"); + DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask) + << " at " << Def << "\n"); ShrinkMask |= S.LaneMask; } } @@ -2335,6 +2412,18 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) LI.removeEmptySubRanges(); } +void JoinVals::removeImplicitDefs() { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned) + continue; + + VNInfo *VNI = LR.getValNumInfo(i); + VNI->markUnused(); + LR.removeValNo(VNI); + } +} + void JoinVals::eraseInstrs(SmallPtrSetImpl &ErasedInstrs, SmallVectorImpl &ShrinkRegs) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { @@ -2379,7 +2468,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl &ErasedInstrs, } void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, - unsigned LaneMask, + LaneBitmask LaneMask, const CoalescerPair &CP) { SmallVector NewVNInfo; JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, @@ -2388,12 +2477,21 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, NewVNInfo, CP, LIS, TRI, true, true); // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) - // Conflicts should already be resolved so the mapping/resolution should - // always succeed. - if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) - llvm_unreachable("Can't join subrange although main ranges are compatible"); - if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) - llvm_unreachable("Can't join subrange although main ranges are compatible"); + // We should be able to resolve all conflicts here as we could successfully do + // it on the mainrange already. There is however a problem when multiple + // ranges get mapped to the "overflow" lane mask bit which creates unexpected + // interferences. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { + // We already determined that it is legal to merge the intervals, so this + // should never fail. + llvm_unreachable("*** Couldn't join subrange!\n"); + } + if (!LHSVals.resolveConflicts(RHSVals) || + !RHSVals.resolveConflicts(LHSVals)) { + // We already determined that it is legal to merge the intervals, so this + // should never fail. + llvm_unreachable("*** Couldn't join subrange!\n"); + } // The merging algorithm in LiveInterval::join() can't handle conflicting // value mappings, so we need to remove any live ranges that overlap a @@ -2403,6 +2501,9 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, LHSVals.pruneValues(RHSVals, EndPoints, false); RHSVals.pruneValues(LHSVals, EndPoints, false); + LHSVals.removeImplicitDefs(); + RHSVals.removeImplicitDefs(); + LRange.verify(); RRange.verify(); @@ -2423,24 +2524,26 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, - unsigned LaneMask, CoalescerPair &CP) { + LaneBitmask LaneMask, + CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &R : LI.subranges()) { - unsigned RMask = R.LaneMask; + LaneBitmask RMask = R.LaneMask; // LaneMask of subregisters common to subrange R and ToMerge. - unsigned Common = RMask & LaneMask; + LaneBitmask Common = RMask & LaneMask; // There is nothing to do without common subregs. if (Common == 0) continue; - DEBUG(dbgs() << format("\t\tCopy+Merge %04X into %04X\n", RMask, Common)); + DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into " + << PrintLaneMask(Common) << '\n'); // LaneMask of subregisters contained in the R range but not in ToMerge, // they have to split into their own subrange. - unsigned LRest = RMask & ~LaneMask; + LaneBitmask LRest = RMask & ~LaneMask; LiveInterval::SubRange *CommonRange; if (LRest != 0) { R.LaneMask = LRest; - DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", LRest)); + DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n'); // Duplicate SubRange for newly merged common stuff. CommonRange = LI.createSubRangeFrom(Allocator, Common, R); } else { @@ -2454,7 +2557,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, } if (LaneMask != 0) { - DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask)); + DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n'); LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); } } @@ -2463,7 +2566,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { SmallVector NewVNInfo; LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); - bool TrackSubRegLiveness = MRI->tracksSubRegLiveness(); + bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC()); JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS, @@ -2490,15 +2593,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // create initial subranges if necessary. unsigned DstIdx = CP.getDstIdx(); if (!LHS.hasSubRanges()) { - unsigned Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() - : TRI->getSubRegIndexLaneMask(DstIdx); + LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(DstIdx); // LHS must support subregs or we wouldn't be in this codepath. assert(Mask != 0); LHS.createSubRangeFrom(Allocator, Mask, LHS); } else if (DstIdx != 0) { // Transform LHS lanemasks to new register class if necessary. for (LiveInterval::SubRange &R : LHS.subranges()) { - unsigned Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); + LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); R.LaneMask = Mask; } } @@ -2508,17 +2611,16 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Determine lanemasks of RHS in the coalesced register and merge subranges. unsigned SrcIdx = CP.getSrcIdx(); if (!RHS.hasSubRanges()) { - unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() - : TRI->getSubRegIndexLaneMask(SrcIdx); + LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(SrcIdx); mergeSubRangeInto(LHS, RHS, Mask, CP); } else { // Pair up subranges and merge. for (LiveInterval::SubRange &R : RHS.subranges()) { - unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); + LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); mergeSubRangeInto(LHS, R, Mask, CP); } } - DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); LHSVals.pruneSubRegValues(LHS, ShrinkMask); @@ -2539,7 +2641,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); while (!ShrinkRegs.empty()) - LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); + shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); // Join RHS into LHS. LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); @@ -2641,6 +2743,64 @@ copyCoalesceWorkList(MutableArrayRef CurrList) { return Progress; } +/// Check if DstReg is a terminal node. +/// I.e., it does not have any affinity other than \p Copy. +static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, + const MachineRegisterInfo *MRI) { + assert(Copy.isCopyLike()); + // Check if the destination of this copy as any other affinity. + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg)) + if (&MI != &Copy && MI.isCopyLike()) + return false; + return true; +} + +bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { + assert(Copy.isCopyLike()); + if (!UseTerminalRule) + return false; + unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; + isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg); + // Check if the destination of this copy has any other affinity. + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + // If SrcReg is a physical register, the copy won't be coalesced. + // Ignoring it may have other side effect (like missing + // rematerialization). So keep it. + TargetRegisterInfo::isPhysicalRegister(SrcReg) || + !isTerminalReg(DstReg, Copy, MRI)) + return false; + + // DstReg is a terminal node. Check if it interferes with any other + // copy involving SrcReg. + const MachineBasicBlock *OrigBB = Copy.getParent(); + const LiveInterval &DstLI = LIS->getInterval(DstReg); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { + // Technically we should check if the weight of the new copy is + // interesting compared to the other one and update the weight + // of the copies accordingly. However, this would only work if + // we would gather all the copies first then coalesce, whereas + // right now we interleave both actions. + // For now, just consider the copies that are in the same block. + if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) + continue; + unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; + isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, + OtherSubReg); + if (OtherReg == SrcReg) + OtherReg = OtherSrcReg; + // Check if OtherReg is a non-terminal. + if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || + isTerminalReg(OtherReg, MI, MRI)) + continue; + // Check that OtherReg interfere with DstReg. + if (LIS->getInterval(OtherReg).overlaps(DstLI)) { + DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n'); + return true; + } + } + return false; +} + void RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { DEBUG(dbgs() << MBB->getName() << ":\n"); @@ -2649,6 +2809,8 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // yet, it might invalidate the iterator. const unsigned PrevSize = WorkList.size(); if (JoinGlobalCopies) { + SmallVector LocalTerminals; + SmallVector GlobalTerminals; // Coalesce copies bottom-up to coalesce local defs before local uses. They // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by @@ -2657,17 +2819,35 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { MII != E; ++MII) { if (!MII->isCopyLike()) continue; - if (isLocalCopy(&(*MII), LIS)) - LocalWorkList.push_back(&(*MII)); - else - WorkList.push_back(&(*MII)); + bool ApplyTerminalRule = applyTerminalRule(*MII); + if (isLocalCopy(&(*MII), LIS)) { + if (ApplyTerminalRule) + LocalTerminals.push_back(&(*MII)); + else + LocalWorkList.push_back(&(*MII)); + } else { + if (ApplyTerminalRule) + GlobalTerminals.push_back(&(*MII)); + else + WorkList.push_back(&(*MII)); + } } + // Append the copies evicted by the terminal rule at the end of the list. + LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end()); + WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end()); } else { + SmallVector Terminals; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) - if (MII->isCopyLike()) - WorkList.push_back(MII); + if (MII->isCopyLike()) { + if (applyTerminalRule(*MII)) + Terminals.push_back(&(*MII)); + else + WorkList.push_back(MII); + } + // Append the copies evicted by the terminal rule at the end of the list. + WorkList.append(Terminals.begin(), Terminals.end()); } // Try coalescing the collected copies immediately, and remove the nulls. // This prevents the WorkList from getting too large since most copies are @@ -2694,8 +2874,8 @@ void RegisterCoalescer::joinAllIntervals() { std::vector MBBs; MBBs.reserve(MF->size()); - for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ - MachineBasicBlock *MBB = I; + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), JoinSplitEdges && isSplitEdge(MBB))); } @@ -2730,15 +2910,14 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { MF = &fn; MRI = &fn.getRegInfo(); TM = &fn.getTarget(); - TRI = TM->getSubtargetImpl()->getRegisterInfo(); - TII = TM->getSubtargetImpl()->getInstrInfo(); + const TargetSubtargetInfo &STI = fn.getSubtarget(); + TRI = STI.getRegisterInfo(); + TII = STI.getInstrInfo(); LIS = &getAnalysis(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); Loops = &getAnalysis(); - - const TargetSubtargetInfo &ST = TM->getSubtarget(); if (EnableGlobalCopies == cl::BOU_UNSET) - JoinGlobalCopies = ST.useMachineScheduler(); + JoinGlobalCopies = STI.enableJoinGlobalCopies(); else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); @@ -2770,25 +2949,28 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = InflateRegs[i]; if (MRI->reg_nodbg_empty(Reg)) continue; - if (MRI->recomputeRegClass(Reg, *TM)) { + if (MRI->recomputeRegClass(Reg)) { DEBUG(dbgs() << PrintReg(Reg) << " inflated to " << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); + ++NumInflated; + LiveInterval &LI = LIS->getInterval(Reg); - unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); - if (MaxMask == 0) { + if (LI.hasSubRanges()) { // If the inflated register class does not support subregisters anymore // remove the subranges. - LI.clearSubRanges(); - } else { + if (!MRI->shouldTrackSubRegLiveness(Reg)) { + LI.clearSubRanges(); + } else { #ifndef NDEBUG - // If subranges are still supported, then the same subregs should still - // be supported. - for (LiveInterval::SubRange &S : LI.subranges()) { - assert ((S.LaneMask & ~MaxMask) == 0); - } + LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + // If subranges are still supported, then the same subregs + // should still be supported. + for (LiveInterval::SubRange &S : LI.subranges()) { + assert((S.LaneMask & ~MaxMask) == 0); + } #endif + } } - ++NumInflated; } }