X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FBranchFolding.cpp;h=f8cc3b3999e8a4df39b928c0a3f21f20876bdb2c;hb=0b8c9a80f20772c3793201ab5b251d3520b9cea3;hp=d344af06fd585f0fce69bad18cb3ef3f3ef14386;hpb=aa60ddac394e8d79ecd9700e32b595c115585007;p=oota-llvm.git diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index d344af06fd5..f8cc3b3999e 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -18,31 +18,34 @@ #define DEBUG_TYPE "branchfolding" #include "BranchFolding.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); +STATISTIC(NumHoist , "Number of times common instructions are hoisted"); + static cl::opt FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); + // Throttle for huge numbers of predecessors (compile speed problems) static cl::opt TailMergeThreshold("tail-merge-threshold", @@ -58,61 +61,59 @@ TailMergeSize("tail-merge-size", namespace { /// BranchFolderPass - Wrap branch folder in a machine function pass. - class BranchFolderPass : public MachineFunctionPass, - public BranchFolder { + class BranchFolderPass : public MachineFunctionPass { public: static char ID; - explicit BranchFolderPass(bool defaultEnableTailMerge) - : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {} + explicit BranchFolderPass(): MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Control Flow Optimizer"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; } char BranchFolderPass::ID = 0; +char &llvm::BranchFolderPassID = BranchFolderPass::ID; -FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { - return new BranchFolderPass(DefaultEnableTailMerge); -} +INITIALIZE_PASS(BranchFolderPass, "branch-folder", + "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { - return OptimizeFunction(MF, - MF.getTarget().getInstrInfo(), - MF.getTarget().getRegisterInfo(), - getAnalysisIfAvailable()); + TargetPassConfig *PassConfig = &getAnalysis(); + BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true); + return Folder.OptimizeFunction(MF, + MF.getTarget().getInstrInfo(), + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable()); } -BranchFolder::BranchFolder(bool defaultEnableTailMerge) { +BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) { switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; case cl::BOU_TRUE: EnableTailMerge = true; break; case cl::BOU_FALSE: EnableTailMerge = false; break; } + + EnableHoistCommonCode = CommonHoist; } /// RemoveDeadBlock - Remove the specified dead machine basic block from the /// function, updating the CFG. void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); - DEBUG(errs() << "\nRemoving MBB: " << *MBB); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); MachineFunction *MF = MBB->getParent(); // drop all successors. while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_end()-1); - // If there are any labels in the basic block, unregister them from - // MachineModuleInfo. - if (MMI && !MBB->empty()) { - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - if (I->isLabel()) - // The label ID # is always operand #0, an immediate. - MMI->InvalidateLabel(I->getOperand(0).getImm()); - } - } + // Avoid matching if this pointer gets reused. + TriedMerging.erase(MBB); // Remove the block. MF->erase(MBB); @@ -131,13 +132,12 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { SmallSet ImpDefRegs; MachineBasicBlock::iterator I = MBB->begin(); while (I != MBB->end()) { - if (I->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + if (!I->isImplicitDef()) break; unsigned Reg = I->getOperand(0).getReg(); ImpDefRegs.insert(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - ImpDefRegs.insert(SubReg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + ImpDefRegs.insert(*SubRegs); ++I; } if (ImpDefRegs.empty()) @@ -177,11 +177,19 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MachineModuleInfo *mmi) { if (!tii) return false; + TriedMerging.clear(); + TII = tii; TRI = tri; MMI = mmi; + RS = NULL; - RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + // Use a RegScavenger to help update liveness when required. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) + RS = new RegScavenger(); + else + MRI.invalidateLiveness(); // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; @@ -193,70 +201,46 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MadeChange |= OptimizeImpDefsBlock(MBB); } - bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { - MadeChangeThisIteration = false; - MadeChangeThisIteration |= TailMergeBlocks(MF); - MadeChangeThisIteration |= OptimizeBranches(MF); + MadeChangeThisIteration = TailMergeBlocks(MF); + MadeChangeThisIteration |= OptimizeBranches(MF); + if (EnableHoistCommonCode) + MadeChangeThisIteration |= HoistCommonCode(MF); MadeChange |= MadeChangeThisIteration; } - // Do tail duplication once after tail merging is done. Otherwise it is - // tough to avoid situations where tail duplication and tail merging undo - // each other's transformations ad infinitum. - MadeChange |= TailDuplicateBlocks(MF); - - // See if any jump tables have become mergable or dead as the code generator + // See if any jump tables have become dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); - const std::vector &JTs = JTI->getJumpTables(); - if (!JTs.empty()) { - // Figure out how these jump tables should be merged. - std::vector JTMapping; - JTMapping.reserve(JTs.size()); - - // We always keep the 0th jump table. - JTMapping.push_back(0); - - // Scan the jump tables, seeing if there are any duplicates. Note that this - // is N^2, which should be fixed someday. - for (unsigned i = 1, e = JTs.size(); i != e; ++i) { - if (JTs[i].MBBs.empty()) - JTMapping.push_back(i); - else - JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); - } - - // If a jump table was merge with another one, walk the function rewriting - // references to jump tables to reference the new JT ID's. Keep track of - // whether we see a jump table idx, if not, we can delete the JT. - BitVector JTIsLive(JTs.size()); - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); - BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - MachineOperand &Op = I->getOperand(op); - if (!Op.isJTI()) continue; - unsigned NewIdx = JTMapping[Op.getIndex()]; - Op.setIndex(NewIdx); - - // Remember that this JT is live. - JTIsLive.set(NewIdx); - } - } + if (JTI == 0) { + delete RS; + return MadeChange; + } - // Finally, remove dead jump tables. This happens either because the - // indirect jump was unreachable (and thus deleted) or because the jump - // table was merged with some other one. - for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) - if (!JTIsLive.test(i)) { - JTI->RemoveJumpTable(i); - MadeChange = true; + // Walk the function to find jump tables that are live. + BitVector JTIsLive(JTI->getJumpTables().size()); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + MachineOperand &Op = I->getOperand(op); + if (!Op.isJTI()) continue; + + // Remember that this JT is live. + JTIsLive.set(Op.getIndex()); } } + // Finally, remove dead jump tables. This happens when the + // indirect jump was unreachable (and thus deleted). + for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) + if (!JTIsLive.test(i)) { + JTI->RemoveJumpTable(i); + MadeChange = true; + } + delete RS; return MadeChange; } @@ -298,28 +282,21 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { return Hash; } -/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks -/// with no successors, we hash two instructions, because cross-jumping -/// only saves code when at least two instructions are removed (since a -/// branch must be inserted). For blocks with a successor, one of the -/// two blocks to be tail-merged will end with a branch already, so -/// it gains to cross-jump even for one instruction. -static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, - unsigned minCommonTailLength) { +/// HashEndOfMBB - Hash the last instruction in the MBB. +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) { MachineBasicBlock::const_iterator I = MBB->end(); if (I == MBB->begin()) return 0; // Empty MBB. --I; - unsigned Hash = HashMachineInstr(I); - - if (I == MBB->begin() || minCommonTailLength == 1) - return Hash; // Single instr MBB. + // Skip debug info so it will not affect codegen. + while (I->isDebugValue()) { + if (I==MBB->begin()) + return 0; // MBB empty except for debug info. + --I; + } - --I; - // Hash in the second-to-last instruction. - Hash ^= HashMachineInstr(I) << 2; - return Hash; + return HashMachineInstr(I); } /// ComputeCommonTailLength - Given two machine basic blocks, compute the number @@ -335,39 +312,93 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, unsigned TailLen = 0; while (I1 != MBB1->begin() && I2 != MBB2->begin()) { --I1; --I2; + // Skip debugging pseudos; necessary to avoid changing the code. + while (I1->isDebugValue()) { + if (I1==MBB1->begin()) { + while (I2->isDebugValue()) { + if (I2==MBB2->begin()) + // I1==DBG at begin; I2==DBG at begin + return TailLen; + --I2; + } + ++I2; + // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin + return TailLen; + } + --I1; + } + // I1==first (untested) non-DBG preceding known match + while (I2->isDebugValue()) { + if (I2==MBB2->begin()) { + ++I1; + // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin + return TailLen; + } + --I2; + } + // I1, I2==first (untested) non-DBGs preceding known match if (!I1->isIdenticalTo(I2) || // FIXME: This check is dubious. It's used to get around a problem where // people incorrectly expect inline asm directives to remain in the same // relative order. This is untenable because normal compiler // optimizations (like this one) may reorder and/or merge these // directives. - I1->getOpcode() == TargetInstrInfo::INLINEASM) { + I1->isInlineAsm()) { ++I1; ++I2; break; } ++TailLen; } + // Back past possible debugging pseudos at beginning of block. This matters + // when one block differs from the other only by whether debugging pseudos + // are present at the beginning. (This way, the various checks later for + // I1==MBB1->begin() work as expected.) + if (I1 == MBB1->begin() && I2 != MBB2->begin()) { + --I2; + while (I2->isDebugValue()) { + if (I2 == MBB2->begin()) + return TailLen; + --I2; + } + ++I2; + } + if (I2 == MBB2->begin() && I1 != MBB1->begin()) { + --I1; + while (I1->isDebugValue()) { + if (I1 == MBB1->begin()) + return TailLen; + --I1; + } + ++I1; + } return TailLen; } +void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB) { + if (RS) { + RS->enterBasicBlock(CurMBB); + if (!CurMBB->empty()) + RS->forward(prior(CurMBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } +} + /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything -/// after it, replacing it with an unconditional branch to NewDest. This -/// returns true if OldInst's block is modified, false if NewDest is modified. +/// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { - MachineBasicBlock *OldBB = OldInst->getParent(); + MachineBasicBlock *CurMBB = OldInst->getParent(); - // Remove all the old successors of OldBB from the CFG. - while (!OldBB->succ_empty()) - OldBB->removeSuccessor(OldBB->succ_begin()); + TII->ReplaceTailWithBranchTo(OldInst, NewDest); - // Remove all the dead instructions from the end of OldBB. - OldBB->erase(OldInst, OldBB->end()); + // For targets that use the register scavenger, we must maintain LiveIns. + MaintainLiveIns(CurMBB, NewDest); - // If OldBB isn't immediately before OldBB, insert a branch to it. - if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest)) - TII->InsertBranch(*OldBB, NewDest, 0, SmallVector()); - OldBB->addSuccessor(NewDest); ++NumTailMerge; } @@ -376,6 +407,9 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1) { + if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) + return 0; + MachineFunction &MF = *CurMBB.getParent(); // Create the fall-through block. @@ -393,16 +427,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); // For targets that use the register scavenger, we must maintain LiveIns. - if (RS) { - RS->enterBasicBlock(&CurMBB); - if (!CurMBB.empty()) - RS->forward(prior(CurMBB.end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) - if (RegsLiveAtExit[i]) - NewMBB->addLiveIn(i); - } + MaintainLiveIns(&CurMBB, NewMBB); return NewMBB; } @@ -413,10 +438,11 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { unsigned Time = 0; for (; I != E; ++I) { - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isCall()) + if (I->isDebugValue()) + continue; + if (I->isCall()) Time += 10; - else if (TID.mayLoad() || TID.mayStore()) + else if (I->mayLoad() || I->mayStore()) Time += 2; else ++Time; @@ -431,41 +457,42 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII) { MachineFunction *MF = CurMBB->getParent(); - MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB)); + MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector Cond; + DebugLoc dl; // FIXME: this is nowhere if (I != MF->end() && !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { MachineBasicBlock *NextBB = I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { TII->RemoveBranch(*CurMBB); - TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond); + TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl); return; } } } - TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector()); + TII->InsertBranch(*CurMBB, SuccBB, NULL, + SmallVector(), dl); } bool BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { if (getHash() < o.getHash()) return true; - else if (getHash() > o.getHash()) + if (getHash() > o.getHash()) return false; - else if (getBlock()->getNumber() < o.getBlock()->getNumber()) + if (getBlock()->getNumber() < o.getBlock()->getNumber()) return true; - else if (getBlock()->getNumber() > o.getBlock()->getNumber()) + if (getBlock()->getNumber() > o.getBlock()->getNumber()) return false; - else { - // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing - // an object with itself. + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. #ifndef _GLIBCXX_DEBUG - llvm_unreachable("Predecessor appears twice"); + llvm_unreachable("Predecessor appears twice"); +#else + return false; #endif - return false; - } } /// CountTerminators - Count the number of terminators in the given @@ -481,7 +508,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, break; } --I; - if (!I->getDesc().isTerminator()) break; + if (!I->isTerminator()) break; ++NumTerms; } return NumTerms; @@ -500,10 +527,11 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB) { CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); - MachineFunction *MF = MBB1->getParent(); - if (CommonTailLen == 0) return false; + DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber() + << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen + << '\n'); // It's almost always profitable to merge any number of non-terminator // instructions with the block that falls through into the common successor. @@ -528,8 +556,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // heuristics. unsigned EffectiveTailLen = CommonTailLen; if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && - !MBB1->back().getDesc().isBarrier() && - !MBB2->back().getDesc().isBarrier()) + !MBB1->back().isBarrier() && + !MBB2->back().isBarrier()) ++EffectiveTailLen; // Check if the common tail is long enough to be worthwhile. @@ -540,8 +568,10 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // we don't have to split a block. At worst we will be introducing 1 new // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. + MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && - MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; @@ -616,9 +646,10 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. -unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength) { - unsigned commonTailIndex = 0; +bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex) { + commonTailIndex = 0; unsigned TimeEstimate = ~0U; for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { // Use PredBB if possible; that doesn't require a new branch. @@ -640,10 +671,17 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, SameTails[commonTailIndex].getTailStartPos(); MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - DEBUG(errs() << "\nSplitting BB#" << MBB->getNumber() << ", size " + // If the common tail includes any debug info we will take it pretty + // randomly from one of the inputs. Might be better to remove it? + DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " << maxCommonTailLength); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + if (!newMBB) { + DEBUG(dbgs() << "... failed!"); + return false; + } + SameTails[commonTailIndex].setBlock(newMBB); SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); @@ -651,7 +689,7 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, if (PredBB == MBB) PredBB = newMBB; - return commonTailIndex; + return true; } // See if any of the blocks in MergePotentials (which all have a common single @@ -670,18 +708,18 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // this many instructions in common. unsigned minCommonTailLength = TailMergeSize; - DEBUG(errs() << "\nTryTailMergeBlocks: "; + DEBUG(dbgs() << "\nTryTailMergeBlocks: "; for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - errs() << "BB#" << MergePotentials[i].getBlock()->getNumber() + dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber() << (i == e-1 ? "" : ", "); - errs() << "\n"; + dbgs() << "\n"; if (SuccBB) { - errs() << " with successor BB#" << SuccBB->getNumber() << '\n'; + dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n'; if (PredBB) - errs() << " which has fall-through from BB#" + dbgs() << " which has fall-through from BB#" << PredBB->getNumber() << "\n"; } - errs() << "Looking for common tails of at least " + dbgs() << "Looking for common tails of at least " << minCommonTailLength << " instruction" << (minCommonTailLength == 1 ? "" : "s") << '\n'; ); @@ -746,25 +784,29 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); + if (!CreateCommonTailOnlyBlock(PredBB, + maxCommonTailLength, commonTailIndex)) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } } MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. - DEBUG(errs() << "\nUsing common tail in BB#" << MBB->getNumber() + DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() << " for "); for (unsigned int i=0, e = SameTails.size(); i != e; ++i) { if (commonTailIndex == i) continue; - DEBUG(errs() << "BB#" << SameTails[i].getBlock()->getNumber() + DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. MergePotentials.erase(SameTails[i].getMPIter()); } - DEBUG(errs() << "\n"); + DEBUG(dbgs() << "\n"); // We leave commonTailIndex in the worklist in case there are other blocks // that match it with a smaller number of instructions. MadeChange = true; @@ -773,21 +815,27 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, } bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { - - if (!EnableTailMerge) return false; - bool MadeChange = false; + if (!EnableTailMerge) return MadeChange; // First find blocks with no successors. MergePotentials.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E && MergePotentials.size() < TailMergeThreshold; ++I) { + if (TriedMerging.count(I)) + continue; if (I->succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + // See if we can do any tail merging on those. - if (MergePotentials.size() < TailMergeThreshold && - MergePotentials.size() >= 2) + if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(NULL, NULL); // Look at blocks (IBB) with multiple predecessors (PBB). @@ -809,79 +857,99 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a compile-time infinite loop repeatedly doing and undoing the same // transformations.) - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) { - if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { - SmallPtrSet UniquePreds; - MachineBasicBlock *IBB = I; - MachineBasicBlock *PredBB = prior(I); - MergePotentials.clear(); - for (MachineBasicBlock::pred_iterator P = I->pred_begin(), - E2 = I->pred_end(); - P != E2; ++P) { - MachineBasicBlock *PBB = *P; - // Skip blocks that loop to themselves, can't tail merge these. - if (PBB == IBB) - continue; - // Visit each predecessor only once. - if (!UniquePreds.insert(PBB)) - continue; - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { - // Failing case: IBB is the target of a cbr, and - // we cannot reverse the branch. - SmallVector NewCond(Cond); - if (!Cond.empty() && TBB == IBB) { - if (TII->ReverseBranchCondition(NewCond)) + if (I->pred_size() < 2) continue; + SmallPtrSet UniquePreds; + MachineBasicBlock *IBB = I; + MachineBasicBlock *PredBB = prior(I); + MergePotentials.clear(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); + P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { + MachineBasicBlock *PBB = *P; + if (TriedMerging.count(PBB)) + continue; + + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB == IBB) + continue; + + // Visit each predecessor only once. + if (!UniquePreds.insert(PBB)) + continue; + + // Skip blocks which may jump to a landing pad. Can't tail merge these. + if (PBB->getLandingPadSuccessor()) + continue; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + // Failing case: IBB is the target of a cbr, and we cannot reverse the + // branch. + SmallVector NewCond(Cond); + if (!Cond.empty() && TBB == IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) + FBB = llvm::next(MachineFunction::iterator(PBB)); + } + + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice to have + // a bit in the edge so we didn't have to do all this. + if (IBB->isLandingPad()) { + MachineFunction::iterator IP = PBB; IP++; + MachineBasicBlock *PredNextBB = NULL; + if (IP != MF.end()) + PredNextBB = IP; + if (TBB == NULL) { + if (IBB != PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB != IBB && FBB != IBB) // cbr then ubr + continue; + } else if (Cond.empty()) { + if (TBB != IBB) // ubr + continue; + } else { + if (TBB != IBB && IBB != PredNextBB) // cbr continue; - // This is the QBB case described above - if (!FBB) - FBB = next(MachineFunction::iterator(PBB)); - } - // Failing case: the only way IBB can be reached from PBB is via - // exception handling. Happens for landing pads. Would be nice - // to have a bit in the edge so we didn't have to do all this. - if (IBB->isLandingPad()) { - MachineFunction::iterator IP = PBB; IP++; - MachineBasicBlock *PredNextBB = NULL; - if (IP != MF.end()) - PredNextBB = IP; - if (TBB == NULL) { - if (IBB != PredNextBB) // fallthrough - continue; - } else if (FBB) { - if (TBB != IBB && FBB != IBB) // cbr then ubr - continue; - } else if (Cond.empty()) { - if (TBB != IBB) // ubr - continue; - } else { - if (TBB != IBB && IBB != PredNextBB) // cbr - continue; - } - } - // Remove the unconditional branch at the end, if any. - if (TBB && (Cond.empty() || FBB)) { - TII->RemoveBranch(*PBB); - if (!Cond.empty()) - // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U), - *P)); } + + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PBB); + if (!Cond.empty()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); + } + + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } - if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(IBB, PredBB); - // Reinsert an unconditional branch if needed. - // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks - if (MergePotentials.size() == 1 && - MergePotentials.begin()->getBlock() != PredBB) - FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } + + // If this is a large problem, avoid visiting the same basic blocks multiple + // times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(IBB, PredBB); + + // Reinsert an unconditional branch if needed. The 1 below can occur as a + // result of removing blocks in TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + if (MergePotentials.size() == 1 && + MergePotentials.begin()->getBlock() != PredBB) + FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } + return MadeChange; } @@ -895,7 +963,8 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Make sure blocks are numbered in order MF.RenumberBlocks(); - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + I != E; ) { MachineBasicBlock *MBB = I++; MadeChange |= OptimizeBlock(MBB); @@ -909,70 +978,28 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { return MadeChange; } - -/// CanFallThrough - Return true if the specified block (with the specified -/// branch condition) can implicitly transfer control to the block after it by -/// falling off the end of it. This should return false if it can reach the -/// block after it, but it uses an explicit branch to do so (e.g. a table jump). -/// -/// True is a conservative answer. -/// -bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, - bool BranchUnAnalyzable, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) { - MachineFunction::iterator Fallthrough = CurBB; - ++Fallthrough; - // If FallthroughBlock is off the end of the function, it can't fall through. - if (Fallthrough == CurBB->getParent()->end()) - return false; - - // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible. - if (!CurBB->isSuccessor(Fallthrough)) - return false; - - // If we couldn't analyze the branch, examine the last instruction. - // If the block doesn't end in a known control barrier, assume fallthrough - // is possible. The isPredicable check is needed because this code can be - // called during IfConversion, where an instruction which is normally a - // Barrier is predicated and thus no longer an actual control barrier. This - // is over-conservative though, because if an instruction isn't actually - // predicated we could still treat it like a barrier. - if (BranchUnAnalyzable) - return CurBB->empty() || !CurBB->back().getDesc().isBarrier() || - CurBB->back().getDesc().isPredicable(); - - // If there is no branch, control always falls through. - if (TBB == 0) return true; - - // If there is some explicit branch to the fallthrough block, it can obviously - // reach, even though the branch should get folded to fall through implicitly. - if (MachineFunction::iterator(TBB) == Fallthrough || - MachineFunction::iterator(FBB) == Fallthrough) +// Blocks should be considered empty if they contain only debug info; +// else the debug info would affect codegen. +static bool IsEmptyBlock(MachineBasicBlock *MBB) { + if (MBB->empty()) return true; - - // If it's an unconditional branch to some block not the fall through, it - // doesn't fall through. - if (Cond.empty()) return false; - - // Otherwise, if it is conditional and has no explicit false block, it falls - // through. - return FBB == 0; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + MBBI!=MBBE; ++MBBI) { + if (!MBBI->isDebugValue()) + return false; + } + return true; } -/// CanFallThrough - Return true if the specified can implicitly transfer -/// control to the block after it by falling off the end of it. This should -/// return false if it can reach the block after it, but it uses an explicit -/// branch to do so (e.g. a table jump). -/// -/// True is a conservative answer. -/// -bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) { - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond, true); - return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond); +// Blocks with only debug info and branches should be considered the same +// as blocks with only branches. +static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { + MachineBasicBlock::iterator MBBI, MBBE; + for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) { + if (!MBBI->isDebugValue()) + break; + } + return (MBBI->isBranch()); } /// IsBetterFallthrough - Return true if it would be clearly better to @@ -985,152 +1012,38 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block // into a fallthrough to the return. - if (MBB1->empty() || MBB2->empty()) return false; + if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false; // If there is a clear successor ordering we make sure that one block // will fall through to the next if (MBB1->isSuccessor(MBB2)) return true; if (MBB2->isSuccessor(MBB1)) return false; - MachineInstr *MBB1I = --MBB1->end(); - MachineInstr *MBB2I = --MBB2->end(); - return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); + // Neither block consists entirely of debug info (per IsEmptyBlock check), + // so we needn't test for falling off the beginning here. + MachineBasicBlock::iterator MBB1I = --MBB1->end(); + while (MBB1I->isDebugValue()) + --MBB1I; + MachineBasicBlock::iterator MBB2I = --MBB2->end(); + while (MBB2I->isDebugValue()) + --MBB2I; + return MBB2I->isCall() && !MBB1I->isCall(); } -/// TailDuplicateBlocks - Look for small blocks that are unconditionally -/// branched to and do not fall through. Tail-duplicate their instructions -/// into their predecessors to eliminate (dynamic) branches. -bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) { - bool MadeChange = false; - - // Make sure blocks are numbered in order - MF.RenumberBlocks(); - - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = I++; - - // Only duplicate blocks that end with unconditional branches. - if (CanFallThrough(MBB)) - continue; - - MadeChange |= TailDuplicate(MBB, MF); - - // If it is dead, remove it. - if (MBB->pred_empty()) { - RemoveDeadBlock(MBB); - MadeChange = true; - ++NumDeadBlocks; - } - } - return MadeChange; -} - -/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each -/// of its predecessors. -bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB, - MachineFunction &MF) { - // Don't try to tail-duplicate single-block loops. - if (TailBB->isSuccessor(TailBB)) - return false; - - // Duplicate up to one less than the tail-merge threshold. When optimizing - // for size, duplicate only one, because one branch instruction can be - // eliminated to compensate for the duplication. - unsigned MaxDuplicateCount = - MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ? - 1 : (TailMergeSize - 1); - - // Check the instructions in the block to determine whether tail-duplication - // is invalid or unlikely to be profitable. - unsigned i = 0; - bool HasCall = false; - for (MachineBasicBlock::iterator I = TailBB->begin(); - I != TailBB->end(); ++I, ++i) { - // Non-duplicable things shouldn't be tail-duplicated. - if (I->getDesc().isNotDuplicable()) return false; - // Don't duplicate more than the threshold. - if (i == MaxDuplicateCount) return false; - // Remember if we saw a call. - if (I->getDesc().isCall()) HasCall = true; - } - // Heuristically, don't tail-duplicate calls if it would expand code size, - // as it's less likely to be worth the extra cost. - if (i > 1 && HasCall) - return false; - - // Iterate through all the unique predecessors and tail-duplicate this - // block into them, if possible. Copying the list ahead of time also - // avoids trouble with the predecessor list reallocating. - bool Changed = false; - SmallSetVector Preds(TailBB->pred_begin(), - TailBB->pred_end()); - for (SmallSetVector::iterator PI = Preds.begin(), - PE = Preds.end(); PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - - assert(TailBB != PredBB && - "Single-block loop should have been rejected earlier!"); - if (PredBB->succ_size() > 1) continue; - - MachineBasicBlock *PredTBB, *PredFBB; - SmallVector PredCond; - if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) - continue; - if (!PredCond.empty()) - continue; - // EH edges are ignored by AnalyzeBranch. - if (PredBB->succ_size() != 1) - continue; - // Don't duplicate into a fall-through predecessor (at least for now). - if (PredBB->isLayoutSuccessor(TailBB) && CanFallThrough(PredBB)) - continue; - - DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB - << "From Succ: " << *TailBB); - - // Remove PredBB's unconditional branch. - TII->RemoveBranch(*PredBB); - // Clone the contents of TailBB into PredBB. - for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); - I != E; ++I) { - MachineInstr *NewMI = MF.CloneMachineInstr(I); - PredBB->insert(PredBB->end(), NewMI); - } - - // Update the CFG. - PredBB->removeSuccessor(PredBB->succ_begin()); - assert(PredBB->succ_empty() && - "TailDuplicate called on block with multiple successors!"); - for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), - E = TailBB->succ_end(); I != E; ++I) - PredBB->addSuccessor(*I); - - Changed = true; - } - - // If TailBB was duplicated into all its predecessors except for the prior - // block, which falls through unconditionally, move the contents of this - // block into the prior block. - MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB)); - MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; - SmallVector PriorCond; - bool PriorUnAnalyzable = - TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); - // This has to check PrevBB->succ_size() because EH edges are ignored by - // AnalyzeBranch. - if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && - TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 && - !TailBB->hasAddressTaken()) { - DEBUG(errs() << "\nMerging into block: " << PrevBB - << "From MBB: " << *TailBB); - PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end()); - PrevBB.removeSuccessor(PrevBB.succ_begin());; - assert(PrevBB.succ_empty()); - PrevBB.transferSuccessors(TailBB); - Changed = true; - } - - return Changed; +/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch +/// instructions on the block. Always use the DebugLoc of the first +/// branching instruction found unless its absent, in which case use the +/// DebugLoc of the second if present. +static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return DebugLoc(); + --I; + while (I->isDebugValue() && I != MBB.begin()) + --I; + if (I->isBranch()) + return I->getDebugLoc(); + return DebugLoc(); } /// OptimizeBlock - Analyze and optimize control flow related to the specified @@ -1147,7 +1060,7 @@ ReoptimizeBlock: // explicitly. Landing pads should not do this since the landing-pad table // points to this block. Blocks with their addresses taken shouldn't be // optimized away. - if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) { + if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) { // Dead block? Leave for cleanup later. if (MBB->pred_empty()) return MadeChange; @@ -1162,7 +1075,8 @@ ReoptimizeBlock: } // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. - MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, FallThrough); + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, FallThrough); MadeChange = true; } return MadeChange; @@ -1185,10 +1099,11 @@ ReoptimizeBlock: // destination, remove the branch, replacing it with an unconditional one or // a fall-through. if (PriorTBB && PriorTBB == PriorFBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1203,11 +1118,27 @@ ReoptimizeBlock: // AnalyzeBranch. if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && PrevBB.succ_size() == 1 && - !MBB->hasAddressTaken()) { - DEBUG(errs() << "\nMerging into block: " << PrevBB + !MBB->hasAddressTaken() && !MBB->isLandingPad()) { + DEBUG(dbgs() << "\nMerging into block: " << PrevBB << "From MBB: " << *MBB); + // Remove redundant DBG_VALUEs first. + if (PrevBB.begin() != PrevBB.end()) { + MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); + --PrevBBIter; + MachineBasicBlock::iterator MBBIter = MBB->begin(); + // Check if DBG_VALUE at the end of PrevBB is identical to the + // DBG_VALUE at the beginning of MBB. + while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() + && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { + if (!MBBIter->isIdenticalTo(PrevBBIter)) + break; + MachineInstr *DuplicateDbg = MBBIter; + ++MBBIter; -- PrevBBIter; + DuplicateDbg->eraseFromParent(); + } + } PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); - PrevBB.removeSuccessor(PrevBB.succ_begin());; + PrevBB.removeSuccessor(PrevBB.succ_begin()); assert(PrevBB.succ_empty()); PrevBB.transferSuccessors(MBB); MadeChange = true; @@ -1226,8 +1157,9 @@ ReoptimizeBlock: // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1239,8 +1171,9 @@ ReoptimizeBlock: if (PriorTBB == MBB) { SmallVector NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond); + TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1257,7 +1190,7 @@ ReoptimizeBlock: // the assert condition out of the loop body. if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 && MachineFunction::iterator(PriorTBB) == FallThrough && - !CanFallThrough(MBB)) { + !MBB->canFallThrough()) { bool DoTransform = true; // We have to be careful that the succs of PredBB aren't both no-successor @@ -1269,31 +1202,16 @@ ReoptimizeBlock: !IsBetterFallthrough(PriorTBB, MBB)) DoTransform = false; - // We don't want to do this transformation if we have control flow like: - // br cond BB2 - // BB1: - // .. - // jmp BBX - // BB2: - // .. - // ret - // - // In this case, we could actually be moving the return block *into* a - // loop! - if (DoTransform && !MBB->succ_empty() && - (!CanFallThrough(PriorTBB) || PriorTBB->empty())) - DoTransform = false; - - if (DoTransform) { // Reverse the branch so we will fall through on the previous true cond. SmallVector NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { - DEBUG(errs() << "\nMoving MBB: " << *MBB + DEBUG(dbgs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); + TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); // Move this block to the end of the function. MBB->moveAfter(--MF.end()); @@ -1321,8 +1239,9 @@ ReoptimizeBlock: if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { SmallVector NewCond(CurCond); if (!TII->ReverseBranchCondition(NewCond)) { + DebugLoc dl = getBranchDebugLoc(*MBB); TII->RemoveBranch(*MBB); - TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1332,20 +1251,37 @@ ReoptimizeBlock: // If this branch is the only thing in its block, see if we can forward // other blocks across it. if (CurTBB && CurCond.empty() && CurFBB == 0 && - MBB->begin()->getDesc().isBranch() && CurTBB != MBB && + IsBranchOnlyBlock(MBB) && CurTBB != MBB && !MBB->hasAddressTaken()) { + DebugLoc dl = getBranchDebugLoc(*MBB); // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and // then seeing if the block is empty. TII->RemoveBranch(*MBB); - + // If the only things remaining in the block are debug info, remove these + // as well, so this will behave the same as an empty block in non-debug + // mode. + if (!MBB->empty()) { + bool NonDebugInfoFound = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (!I->isDebugValue()) { + NonDebugInfoFound = true; + break; + } + } + if (!NonDebugInfoFound) + // Make the block empty, losing the debug info (we could probably + // improve this in some cases.) + MBB->erase(MBB->begin(), MBB->end()); + } // If this block is just an unconditional branch to CurTBB, we can // usually completely eliminate the block. The only case we cannot // completely eliminate the block is when the block before this one // falls through into MBB and we can't understand the prior block's branch // condition. if (MBB->empty()) { - bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB); + bool PredHasNoFallThrough = !PrevBB.canFallThrough(); if (PredHasNoFallThrough || !PriorUnAnalyzable || !PrevBB.isSuccessor(MBB)) { // If the prior block falls through into us, turn it into an @@ -1360,8 +1296,9 @@ ReoptimizeBlock: assert(PriorFBB == 0 && "Machine CFG out of date!"); PriorFBB = MBB; } + DebugLoc pdl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1385,9 +1322,10 @@ ReoptimizeBlock: bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); @@ -1396,7 +1334,8 @@ ReoptimizeBlock: } // Change any jumptables to go to the new MBB. - MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, CurTBB); + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, CurTBB); if (DidChange) { ++NumBranchOpts; MadeChange = true; @@ -1406,20 +1345,18 @@ ReoptimizeBlock: } // Add the branch back if the block is more than just an uncond branch. - TII->InsertBranch(*MBB, CurTBB, 0, CurCond); + TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl); } } // If the prior block doesn't fall through into this block, and if this // block doesn't fall through into some other block, see if we can find a // place to move this block where a fall-through will happen. - if (!CanFallThrough(&PrevBB, PriorUnAnalyzable, - PriorTBB, PriorFBB, PriorCond)) { + if (!PrevBB.canFallThrough()) { // Now we know that there was no fall-through into this block, check to // see if it has a fall-through into its successor. - bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, - CurCond); + bool CurFallsThru = MBB->canFallThrough(); if (!MBB->isLandingPad()) { // Check all the predecessors of this block. If one of them has no fall @@ -1429,9 +1366,9 @@ ReoptimizeBlock: // Analyze the branch at the end of the pred. MachineBasicBlock *PredBB = *PI; MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; - MachineBasicBlock *PredTBB, *PredFBB; + MachineBasicBlock *PredTBB = 0, *PredFBB = 0; SmallVector PredCond; - if (PredBB != MBB && !CanFallThrough(PredBB) && + if (PredBB != MBB && !PredBB->canFallThrough() && !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) && (!CurFallsThru || !CurTBB || !CurFBB) && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { @@ -1446,9 +1383,9 @@ ReoptimizeBlock: // B elsewhere // next: if (CurFallsThru) { - MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; @@ -1470,7 +1407,7 @@ ReoptimizeBlock: // and if the successor isn't an EH destination, we can arrange for the // fallthrough to happen. if (SuccBB != MBB && &*SuccPrev != MBB && - !CanFallThrough(SuccPrev) && !CurUnAnalyzable && + !SuccPrev->canFallThrough() && !CurUnAnalyzable && !SuccBB->isLandingPad()) { MBB->moveBefore(SuccBB); MadeChange = true; @@ -1481,7 +1418,7 @@ ReoptimizeBlock: // Okay, there is no really great place to put this block. If, however, // the block before this one would be a fall-through if this block were // removed, move this block to the end of the function. - MachineBasicBlock *PrevTBB, *PrevFBB; + MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0; SmallVector PrevCond; if (FallThrough != MF.end() && !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && @@ -1495,3 +1432,294 @@ ReoptimizeBlock: return MadeChange; } + +//===----------------------------------------------------------------------===// +// Hoist Common Code +//===----------------------------------------------------------------------===// + +/// HoistCommonCode - Hoist common instruction sequences at the start of basic +/// blocks to their common predecessor. +bool BranchFolder::HoistCommonCode(MachineFunction &MF) { + bool MadeChange = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + MadeChange |= HoistCommonCodeInSuccs(MBB); + } + + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + E = BB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBB = *SI; + if (SuccBB != TrueBB) + return SuccBB; + } + return NULL; +} + +/// findHoistingInsertPosAndDeps - Find the location to move common instructions +/// in successors to. The location is usually just before the terminator, +/// however if the terminator is a conditional branch and its previous +/// instruction is the flag setting instruction, the previous instruction is +/// the preferred location. This function also gathers uses and defs of the +/// instructions from the insertion point to the end of the block. The data is +/// used by HoistCommonCodeInSuccs to ensure safety. +static +MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + SmallSet &Uses, + SmallSet &Defs) { + MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); + if (!TII->isUnpredicatedTerminator(Loc)) + return MBB->end(); + + for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = Loc->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); + } else if (!MO.isDead()) + // Don't try to hoist code in the rare case the terminator defines a + // register that is later used. + return MBB->end(); + } + + if (Uses.empty()) + return Loc; + if (Loc == MBB->begin()) + return MBB->end(); + + // The terminator is probably a conditional branch, try not to separate the + // branch from condition setting instruction. + MachineBasicBlock::iterator PI = Loc; + --PI; + while (PI != MBB->begin() && Loc->isDebugValue()) + --PI; + + bool IsDef = false; + for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) { + const MachineOperand &MO = PI->getOperand(i); + // If PI has a regmask operand, it is probably a call. Separate away. + if (MO.isRegMask()) + return Loc; + if (!MO.isReg() || MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (Uses.count(Reg)) + IsDef = true; + } + if (!IsDef) + // The condition setting instruction is not just before the conditional + // branch. + return Loc; + + // Be conservative, don't insert instruction above something that may have + // side-effects. And since it's potentially bad to separate flag setting + // instruction from the conditional branch, just abort the optimization + // completely. + // Also avoid moving code above predicated instruction since it's hard to + // reason about register liveness with predicated instruction. + bool DontMoveAcrossStore = true; + if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) || + TII->isPredicated(PI)) + return MBB->end(); + + + // Find out what registers are live. Note this routine is ignoring other live + // registers which are only used by instructions in successor blocks. + for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = PI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); + } else { + if (Uses.erase(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative + } + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Defs.insert(*AI); + } + } + + return PI; +} + +/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction +/// sequence at the start of the function, move the instructions before MBB +/// terminator if it's legal. +bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty()) + return false; + + if (!FBB) FBB = findFalseBlock(MBB, TBB); + if (!FBB) + // Malformed bcc? True and false blocks are the same? + return false; + + // Restrict the optimization to cases where MBB is the only predecessor, + // it is an obvious win. + if (TBB->pred_size() > 1 || FBB->pred_size() > 1) + return false; + + // Find a suitable position to hoist the common instructions to. Also figure + // out which registers are used or defined by instructions from the insertion + // point to the end of the block. + SmallSet Uses, Defs; + MachineBasicBlock::iterator Loc = + findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs); + if (Loc == MBB->end()) + return false; + + bool HasDups = false; + SmallVector LocalDefs; + SmallSet LocalDefsSet; + MachineBasicBlock::iterator TIB = TBB->begin(); + MachineBasicBlock::iterator FIB = FBB->begin(); + MachineBasicBlock::iterator TIE = TBB->end(); + MachineBasicBlock::iterator FIE = FBB->end(); + while (TIB != TIE && FIB != FIE) { + // Skip dbg_value instructions. These do not count. + if (TIB->isDebugValue()) { + while (TIB != TIE && TIB->isDebugValue()) + ++TIB; + if (TIB == TIE) + break; + } + if (FIB->isDebugValue()) { + while (FIB != FIE && FIB->isDebugValue()) + ++FIB; + if (FIB == FIE) + break; + } + if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead)) + break; + + if (TII->isPredicated(TIB)) + // Hard to reason about register liveness with predicated instruction. + break; + + bool IsSafe = true; + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + // Don't attempt to hoist instructions with register masks. + if (MO.isRegMask()) { + IsSafe = false; + break; + } + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + if (Uses.count(Reg)) { + // Avoid clobbering a register that's used by the instruction at + // the point of insertion. + IsSafe = false; + break; + } + + if (Defs.count(Reg) && !MO.isDead()) { + // Don't hoist the instruction if the def would be clobber by the + // instruction at the point insertion. FIXME: This is overly + // conservative. It should be possible to hoist the instructions + // in BB2 in the following example: + // BB1: + // r1, eflag = op1 r2, r3 + // brcc eflag + // + // BB2: + // r1 = op2, ... + // = op3, r1 + IsSafe = false; + break; + } + } else if (!LocalDefsSet.count(Reg)) { + if (Defs.count(Reg)) { + // Use is defined by the instruction at the point of insertion. + IsSafe = false; + break; + } + + if (MO.isKill() && Uses.count(Reg)) + // Kills a register that's read by the instruction at the point of + // insertion. Remove the kill marker. + MO.setIsKill(false); + } + } + if (!IsSafe) + break; + + bool DontMoveAcrossStore = true; + if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore)) + break; + + // Remove kills from LocalDefsSet, these registers had short live ranges. + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || !LocalDefsSet.count(Reg)) + continue; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); + } + + // Track local defs so we can update liveins. + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isDead()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + LocalDefs.push_back(Reg); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.insert(*AI); + } + + HasDups = true; + ++TIB; + ++FIB; + } + + if (!HasDups) + return false; + + MBB->splice(Loc, TBB, TBB->begin(), TIB); + FBB->erase(FBB->begin(), FIB); + + // Update livein's. + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Def = LocalDefs[i]; + if (LocalDefsSet.count(Def)) { + TBB->addLiveIn(Def); + FBB->addLiveIn(Def); + } + } + + ++NumHoist; + return true; +}