X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineCSE.cpp;h=896461fd194b52936db2b67755f419337cf91cfe;hb=bb7300224738e65f9a9945dc7df2f360b1d4feb6;hp=1e5110a95078417037e41e8a9b20b1f0ac03be0b;hpb=1df91b0e54bc62f8fc7a06a4f75220e40aa2dfe0;p=oota-llvm.git diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 1e5110a9507..896461fd194 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -63,6 +63,8 @@ namespace { virtual void releaseMemory() { ScopeMap.clear(); Exps.clear(); + AllocatableRegs.clear(); + ReservedRegs.clear(); } private: @@ -76,11 +78,13 @@ namespace { ScopedHTType VNT; SmallVector Exps; unsigned CurrVN; + BitVector AllocatableRegs; + BitVector ReservedRegs; bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, - MachineBasicBlock::const_iterator E) const ; + MachineBasicBlock::const_iterator E) const; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, @@ -96,13 +100,13 @@ namespace { void ExitScope(MachineBasicBlock *MBB); bool ProcessBlock(MachineBasicBlock *MBB); void ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap &OpenChildren, - DenseMap &ParentMap); + DenseMap &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); }; } // end anonymous namespace char MachineCSE::ID = 0; +char &llvm::MachineCSEID = MachineCSE::ID; INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -110,8 +114,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) -FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } - bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB) { bool Changed = false; @@ -167,6 +169,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool SeenDef = false; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { const MachineOperand &MO = I->getOperand(i); + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + SeenDef = true; if (!MO.isReg() || !MO.getReg()) continue; if (!TRI->regsOverlap(MO.getReg(), Reg)) @@ -211,11 +215,12 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (MO.isDef() && (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) continue; - PhysRefs.insert(Reg); + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); if (MO.isDef()) PhysDefs.push_back(Reg); - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) - PhysRefs.insert(*Alias); } return !PhysRefs.empty(); @@ -237,9 +242,9 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { - if (TRI->isInAllocatableClass(PhysDefs[i])) - // Avoid extending live range of physical registers unless - // they are unallocatable. + if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i])) + // Avoid extending live range of physical registers if they are + //allocatable or reserved. return false; } CrossMBB = true; @@ -268,6 +273,10 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { const MachineOperand &MO = I->getOperand(i); + // RegMasks go on instructions like calls that clobber lots of physregs. + // Don't attempt to CSE across such an instruction. + if (MO.isRegMask()) + return false; if (!MO.isReg() || !MO.isDef()) continue; unsigned MOReg = MO.getReg(); @@ -317,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. + // If CSReg is used at all uses of Reg, CSE should not increase register + // pressure of CSReg. + bool MayIncreasePressure = true; + if (TargetRegisterInfo::isVirtualRegister(CSReg) && + TargetRegisterInfo::isVirtualRegister(Reg)) { + MayIncreasePressure = false; + SmallPtrSet CSUses; + for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + CSUses.insert(Use); + } + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + if (!CSUses.count(Use)) { + MayIncreasePressure = true; + break; + } + } + } + if (!MayIncreasePressure) return true; + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. @@ -387,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector, 8> CSEPairs; + SmallVector ImplicitDefsToUpdate; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -428,7 +461,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. bool CrossMBBPhysDef = false; - SmallSet PhysRefs; + SmallSet PhysRefs; SmallVector PhysDefs; if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { FoundCSE = false; @@ -456,21 +489,31 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned OldReg = MO.getReg(); unsigned NewReg = CSMI->getOperand(i).getReg(); - if (OldReg == NewReg) + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + if (OldReg == NewReg) { + --NumDefs; continue; + } assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { + DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); DoCSE = false; break; } @@ -479,6 +522,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // within the register class of the new instruction. const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); if (!MRI->constrainRegClass(NewReg, OldRC)) { + DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); DoCSE = false; break; } @@ -494,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->clearKillFlags(CSEPairs[i].second); } + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) + CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. @@ -513,11 +562,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { ++NumCommutes; Changed = true; } else { - DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); Exps.push_back(MI); } CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); } return Changed; @@ -528,8 +577,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { /// up the dominator tree to destroy ancestors which are now done. void MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap &OpenChildren, - DenseMap &ParentMap) { + DenseMap &OpenChildren) { if (OpenChildren[Node]) return; @@ -537,7 +585,7 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, ExitScope(Node->getBlock()); // Now traverse upwards to pop ancestors whose offsprings are all done. - while (MachineDomTreeNode *Parent = ParentMap[Node]) { + while (MachineDomTreeNode *Parent = Node->getIDom()) { unsigned Left = --OpenChildren[Parent]; if (Left != 0) break; @@ -549,7 +597,6 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { SmallVector Scopes; SmallVector WorkList; - DenseMap ParentMap; DenseMap OpenChildren; CurrVN = 0; @@ -564,7 +611,6 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { OpenChildren[Node] = NumChildren; for (unsigned i = 0; i != NumChildren; ++i) { MachineDomTreeNode *Child = Children[i]; - ParentMap[Child] = Node; WorkList.push_back(Child); } } while (!WorkList.empty()); @@ -577,7 +623,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { EnterScope(MBB); Changed |= ProcessBlock(MBB); // If it's a leaf node, it's done. Traverse upwards to pop ancestors. - ExitScopeIfDone(Node, OpenChildren, ParentMap); + ExitScopeIfDone(Node, OpenChildren); } return Changed; @@ -589,5 +635,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis(); DT = &getAnalysis(); + AllocatableRegs = TRI->getAllocatableSet(MF); + ReservedRegs = TRI->getReservedRegs(MF); return PerformCSE(DT->getRootNode()); }