X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FCodeGen%2FRegisterCoalescer.cpp;h=292e836c495a9e656f49f2cf10c49a65a6cc2504;hb=39d772ac647c6a5a596a0de802f5a3aa025130e1;hp=9e3cf415e44f6aaa3b852d6d0246775da1db9673;hpb=45f376c4e6bb65d34f45eb8127759eb39b708b56;p=oota-llvm.git diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 9e3cf415e44..292e836c495 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -60,7 +60,7 @@ EnableJoining("join-liveintervals", static cl::opt UseTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), - cl::init(false)); + cl::init(false), cl::Hidden); /// Temporary flag to test critical edge unsplitting. static cl::opt @@ -194,7 +194,7 @@ namespace { /// If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, + bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy); /// Return true if a copy involving a physreg should be joined. @@ -224,6 +224,32 @@ namespace { /// Dst, we can drop \p Copy. bool applyTerminalRule(const MachineInstr &Copy) const; + /// Check whether or not \p LI is composed by multiple connected + /// components and if that is the case, fix that. + void splitNewRanges(LiveInterval *LI) { + ConnectedVNInfoEqClasses ConEQ(*LIS); + unsigned NumComps = ConEQ.Classify(LI); + if (NumComps <= 1) + return; + SmallVector NewComps(1, LI); + for (unsigned i = 1; i != NumComps; ++i) { + unsigned VReg = MRI->createVirtualRegister(MRI->getRegClass(LI->reg)); + NewComps.push_back(&LIS->createEmptyInterval(VReg)); + } + + ConEQ.Distribute(&NewComps[0], *MRI); + } + + /// Wrapper method for \see LiveIntervals::shrinkToUses. + /// This method does the proper fixing of the live-ranges when the afore + /// mentioned method returns true. + void shrinkToUses(LiveInterval *LI, + SmallVectorImpl *Dead = nullptr) { + if (LIS->shrinkToUses(LI, Dead)) + // We may have created multiple connected components, split them. + splitNewRanges(LI); + } + public: static char ID; ///< Class identification, replacement for typeinfo RegisterCoalescer() : MachineFunctionPass(ID) { @@ -556,7 +582,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // will also add the isKill marker. CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); if (AS->end == CopyIdx) - LIS->shrinkToUses(&IntA); + shrinkToUses(&IntA); ++numExtends; return true; @@ -851,7 +877,7 @@ static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { return false; } -bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, +bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy) { IsDefCopy = false; @@ -882,7 +908,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, if (!definesFullReg(*DefMI, SrcReg)) return false; bool SawStore = false; - if (!DefMI->isSafeToMove(TII, AA, SawStore)) + if (!DefMI->isSafeToMove(AA, SawStore)) return false; const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) @@ -929,6 +955,28 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); MachineInstr *NewMI = std::prev(MII); + // In a situation like the following: + // %vreg0:subreg = instr ; DefMI, subreg = DstIdx + // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0 + // instead of widening %vreg1 to the register class of %vreg0 simply do: + // %vreg1 = instr + const TargetRegisterClass *NewRC = CP.getNewRC(); + if (DstIdx != 0) { + MachineOperand &DefMO = NewMI->getOperand(0); + if (DefMO.getSubReg() == DstIdx) { + assert(SrcIdx == 0 && CP.isFlipped() + && "Shouldn't have SrcIdx+DstIdx at this point"); + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + const TargetRegisterClass *CommonRC = + TRI->getCommonSubClass(DefRC, DstRC); + if (CommonRC != nullptr) { + NewRC = CommonRC; + DstIdx = 0; + DefMO.setSubReg(0); + } + } + } + LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); ErasedInstrs.insert(CopyMI); @@ -940,15 +988,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, for (unsigned i = NewMI->getDesc().getNumOperands(), e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); - if (MO.isReg()) { - assert(MO.isDef() && MO.isImplicit() && MO.isDead() && + if (MO.isReg() && MO.isDef()) { + assert(MO.isImplicit() && MO.isDead() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())); NewMIImplDefs.push_back(MO.getReg()); } } if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - const TargetRegisterClass *NewRC = CP.getNewRC(); unsigned NewIdx = NewMI->getOperand(0).getSubReg(); if (DefRC != nullptr) { @@ -1024,7 +1071,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, ++NumReMats; // The source interval can become smaller because we removed a use. - LIS->shrinkToUses(&SrcInt, &DeadDefs); + shrinkToUses(&SrcInt, &DeadDefs); if (!DeadDefs.empty()) { // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs // to describe DstReg instead. @@ -1402,10 +1449,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { << format("%04X", S.LaneMask) << ")\n"); LIS->shrinkToUses(S, LI.reg); } + LI.removeEmptySubRanges(); } if (ShrinkMainRange) { LiveInterval &LI = LIS->getInterval(CP.getDstReg()); - LIS->shrinkToUses(&LI); + shrinkToUses(&LI); } // SrcReg is guaranteed to be the register whose live interval that is @@ -1483,6 +1531,14 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { DEBUG(dbgs() << "\t\tInterference (read): " << *MI); return false; } + + // We must also check for clobbers caused by regmasks. + for (const auto &MO : MI->operands()) { + if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) { + DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI); + return false; + } + } } // We're going to remove the copy which defines a physical reserved @@ -1766,7 +1822,7 @@ public: /// Removes subranges starting at copies that get removed. This sometimes /// happens when undefined subranges are copied around. These ranges contain - /// no usefull information and can be removed. + /// no useful information and can be removed. void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask); /// Erase any machine instructions that have been coalesced away. @@ -1787,12 +1843,12 @@ public: unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const { unsigned L = 0; - for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->getReg() != Reg || !MO->isDef()) + for (const MachineOperand &MO : DefMI->operands()) { + if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef()) continue; L |= TRI->getSubRegIndexLaneMask( - TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); - if (MO->readsReg()) + TRI->composeSubRegIndices(SubIdx, MO.getSubReg())); + if (MO.readsReg()) Redef = true; } return L; @@ -2177,13 +2233,13 @@ bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, unsigned Lanes) const { if (MI->isDebugValue()) return false; - for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) continue; - if (!MO->readsReg()) + if (!MO.readsReg()) continue; if (Lanes & TRI->getSubRegIndexLaneMask( - TRI->composeSubRegIndices(SubIdx, MO->getSubReg()))) + TRI->composeSubRegIndices(SubIdx, MO.getSubReg()))) return true; } return false; @@ -2292,11 +2348,11 @@ void JoinVals::pruneValues(JoinVals &Other, // Remove flags. This def is now a partial redef. // Also remove flags since the joined live range will // continue past this instruction. - for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); - MO.isValid(); ++MO) { - if (MO->isReg() && MO->isDef() && MO->getReg() == Reg) { - MO->setIsUndef(EraseImpDef); - MO->setIsDead(false); + for (MachineOperand &MO : + Indexes->getInstructionFromIndex(Def)->operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { + MO.setIsUndef(EraseImpDef); + MO.setIsDead(false); } } } @@ -2354,7 +2410,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) continue; } // If a subrange ends at the copy, then a value was copied but only - // partially used later. Shrink the subregister range apropriately. + // partially used later. Shrink the subregister range appropriately. if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { DEBUG(dbgs() << "\t\tDead uses at sublane " << format("%04X", S.LaneMask) << " at " << Def << "\n"); @@ -2586,7 +2642,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // "overflow bit" 32. As a workaround we drop all subregister ranges // which means we loose some precision but are back to a well defined // state. - assert((CP.getNewRC()->getLaneMask() & 0x80000000u) + assert(TargetRegisterInfo::isImpreciseLaneMask( + CP.getNewRC()->getLaneMask()) && "SubRange merge should only fail when merging into bit 32."); DEBUG(dbgs() << "\tSubrange join aborted!\n"); LHS.clearSubRanges(); @@ -2613,7 +2670,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); while (!ShrinkRegs.empty()) - LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); + shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); // Join RHS into LHS. LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); @@ -2731,15 +2788,19 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { assert(Copy.isCopyLike()); if (!UseTerminalRule) return false; + unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; + isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg); // Check if the destination of this copy has any other affinity. - unsigned DstReg = Copy.getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + // If SrcReg is a physical register, the copy won't be coalesced. + // Ignoring it may have other side effect (like missing + // rematerialization). So keep it. + TargetRegisterInfo::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI)) return false; - // DstReg is a terminal node. Check if it inteferes with any other + // DstReg is a terminal node. Check if it interferes with any other // copy involving SrcReg. - unsigned SrcReg = Copy.getOperand(1).getReg(); const MachineBasicBlock *OrigBB = Copy.getParent(); const LiveInterval &DstLI = LIS->getInterval(DstReg); for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { @@ -2751,9 +2812,11 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { // For now, just consider the copies that are in the same block. if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) continue; - unsigned OtherReg = MI.getOperand(0).getReg(); + unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; + isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, + OtherSubReg); if (OtherReg == SrcReg) - OtherReg = MI.getOperand(1).getReg(); + OtherReg = OtherSrcReg; // Check if OtherReg is a non-terminal. if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || isTerminalReg(OtherReg, MI, MRI)) @@ -2775,25 +2838,45 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // yet, it might invalidate the iterator. const unsigned PrevSize = WorkList.size(); if (JoinGlobalCopies) { + SmallVector LocalTerminals; + SmallVector GlobalTerminals; // Coalesce copies bottom-up to coalesce local defs before local uses. They // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by // cmp+jmp macro fusion. for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { - if (!MII->isCopyLike() || applyTerminalRule(*MII)) + if (!MII->isCopyLike()) continue; - if (isLocalCopy(&(*MII), LIS)) - LocalWorkList.push_back(&(*MII)); - else - WorkList.push_back(&(*MII)); + bool ApplyTerminalRule = applyTerminalRule(*MII); + if (isLocalCopy(&(*MII), LIS)) { + if (ApplyTerminalRule) + LocalTerminals.push_back(&(*MII)); + else + LocalWorkList.push_back(&(*MII)); + } else { + if (ApplyTerminalRule) + GlobalTerminals.push_back(&(*MII)); + else + WorkList.push_back(&(*MII)); + } } + // Append the copies evicted by the terminal rule at the end of the list. + LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end()); + WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end()); } else { + SmallVector Terminals; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) - if (MII->isCopyLike() && !applyTerminalRule(*MII)) - WorkList.push_back(MII); + if (MII->isCopyLike()) { + if (applyTerminalRule(*MII)) + Terminals.push_back(&(*MII)); + else + WorkList.push_back(MII); + } + // Append the copies evicted by the terminal rule at the end of the list. + WorkList.append(Terminals.begin(), Terminals.end()); } // Try coalescing the collected copies immediately, and remove the nulls. // This prevents the WorkList from getting too large since most copies are