X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FBranchFolding.cpp;h=dc67593c339aa93709ce1e5115d5975e29388433;hb=afe6c2b001a924cd74bd0aacfed5984d9af004b0;hp=ffde173e6950fb416778d7e4153b355b6d2fa2ac;hpb=fe7e397100edd4f2d618a1ff938dfa8624670ec1;p=oota-llvm.git diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index ffde173e695..dc67593c339 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by the LLVM research group and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -24,7 +24,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" @@ -38,9 +38,15 @@ STATISTIC(NumTailMerge , "Number of block tails merged"); static cl::opt FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); namespace { + // Throttle for huge numbers of predecessors (compile speed problems) + cl::opt + TailMergeThreshold("tail-merge-threshold", + cl::desc("Max number of predecessors to consider tail merging"), + cl::init(100), cl::Hidden); + struct BranchFolder : public MachineFunctionPass { static char ID; - BranchFolder(bool defaultEnableTailMerge) : + explicit BranchFolder(bool defaultEnableTailMerge) : MachineFunctionPass((intptr_t)&ID) { switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; @@ -66,7 +72,7 @@ namespace { MachineBasicBlock::iterator BBI1); std::vector > MergePotentials; - const MRegisterInfo *RegInfo; + const TargetRegisterInfo *RegInfo; RegScavenger *RS; // Branch optzn. bool OptimizeBranches(MachineFunction &MF); @@ -81,12 +87,6 @@ namespace { char BranchFolder::ID = 0; } -static bool CorrectExtraCFGEdges(MachineBasicBlock &MBB, - MachineBasicBlock *DestA, - MachineBasicBlock *DestB, - bool isCond, - MachineFunction::iterator FallThru); - FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { return new BranchFolder(DefaultEnableTailMerge); } @@ -127,8 +127,7 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; std::vector Cond; if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) - EverMadeChange |= CorrectExtraCFGEdges(*MBB, TBB, FBB, - !Cond.empty(), next(I)); + EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } RegInfo = MF.getTarget().getRegisterInfo(); @@ -173,8 +172,8 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { MachineOperand &Op = I->getOperand(op); if (!Op.isJumpTableIndex()) continue; - unsigned NewIdx = JTMapping[Op.getJumpTableIndex()]; - Op.setJumpTableIndex(NewIdx); + unsigned NewIdx = JTMapping[Op.getIndex()]; + Op.setIndex(NewIdx); // Remember that this JT is live. JTIsLive[NewIdx] = true; @@ -211,14 +210,12 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { case MachineOperand::MO_Register: OperandHash = Op.getReg(); break; case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break; case MachineOperand::MO_MachineBasicBlock: - OperandHash = Op.getMachineBasicBlock()->getNumber(); + OperandHash = Op.getMBB()->getNumber(); break; - case MachineOperand::MO_FrameIndex: OperandHash = Op.getFrameIndex(); break; + case MachineOperand::MO_FrameIndex: case MachineOperand::MO_ConstantPoolIndex: - OperandHash = Op.getConstantPoolIndex(); - break; case MachineOperand::MO_JumpTableIndex: - OperandHash = Op.getJumpTableIndex(); + OperandHash = Op.getIndex(); break; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: @@ -272,7 +269,13 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, unsigned TailLen = 0; while (I1 != MBB1->begin() && I2 != MBB2->begin()) { --I1; --I2; - if (!I1->isIdenticalTo(I2)) { + if (!I1->isIdenticalTo(I2) || + // FIXME: This check is dubious. It's used to get around a problem where + // people incorrectly expect inline asm directives to remain in the same + // relative order. This is untenable because normal compiler + // optimizations (like this one) may reorder and/or merge these + // directives. + I1->getOpcode() == TargetInstrInfo::INLINEASM) { ++I1; ++I2; break; } @@ -343,14 +346,13 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, /// EstimateRuntime - Make a rough estimate for how long it will take to run /// the specified code. static unsigned EstimateRuntime(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E, - const TargetInstrInfo *TII) { + MachineBasicBlock::iterator E) { unsigned Time = 0; for (; I != E; ++I) { - const TargetInstrDescriptor &TID = TII->get(I->getOpcode()); - if (TID.Flags & M_CALL_FLAG) + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isCall()) Time += 10; - else if (TID.Flags & (M_LOAD_FLAG|M_STORE_FLAG)) + else if (TID.isSimpleLoad() || TID.mayStore()) Time += 2; else ++Time; @@ -365,7 +367,6 @@ static bool ShouldSplitFirstBlock(MachineBasicBlock *MBB1, MachineBasicBlock::iterator MBB1I, MachineBasicBlock *MBB2, MachineBasicBlock::iterator MBB2I, - const TargetInstrInfo *TII, MachineBasicBlock *PredBB) { // If one block is the entry block, split the other one; we can't generate // a branch to the entry block, as its label is not emitted. @@ -386,8 +387,8 @@ static bool ShouldSplitFirstBlock(MachineBasicBlock *MBB1, // TODO: if we had some notion of which block was hotter, we could split // the hot block, so it is the fall-through. Since we don't have profile info // make a decision based on which will hurt most to split. - unsigned MBB1Time = EstimateRuntime(MBB1->begin(), MBB1I, TII); - unsigned MBB2Time = EstimateRuntime(MBB2->begin(), MBB2I, TII); + unsigned MBB1Time = EstimateRuntime(MBB1->begin(), MBB1I); + unsigned MBB2Time = EstimateRuntime(MBB2->begin(), MBB2I); // If the MBB1 prefix takes "less time" to run than the MBB2 prefix, split the // MBB1 block so it falls through. This will penalize the MBB2 path, but will @@ -430,8 +431,14 @@ static bool MergeCompare(const std::pair &p, return true; else if (p.second->getNumber() > q.second->getNumber()) return false; - else + else { + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. +#ifndef _GLIBCXX_DEBUG assert(0 && "Predecessor appears twice"); +#endif + return(false); + } } // See if any of the blocks in MergePotentials (which all have a common single @@ -535,7 +542,7 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, } // Decide whether we want to split CurMBB or MBB2. - if (ShouldSplitFirstBlock(CurMBB, BBI1, MBB2, BBI2, TII, PredBB)) { + if (ShouldSplitFirstBlock(CurMBB, BBI1, MBB2, BBI2, PredBB)) { CurMBB = SplitMBBAt(*CurMBB, BBI1); BBI1 = CurMBB->begin(); MergePotentials.back().second = CurMBB; @@ -577,7 +584,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I)); } // See if we can do any tail merging on those. - MadeChange |= TryMergeBlocks(NULL, NULL); + if (MergePotentials.size() < TailMergeThreshold) + MadeChange |= TryMergeBlocks(NULL, NULL); // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by @@ -599,11 +607,13 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // transformations.) for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - if (!I->succ_empty() && I->pred_size() >= 2) { + if (!I->succ_empty() && I->pred_size() >= 2 && + I->pred_size() < TailMergeThreshold) { MachineBasicBlock *IBB = I; MachineBasicBlock *PredBB = prior(I); MergePotentials.clear(); - for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); P != E2; ++P) { MachineBasicBlock* PBB = *P; // Skip blocks that loop to themselves, can't tail merge these. @@ -636,7 +646,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { } else if (FBB) { if (TBB!=IBB && FBB!=IBB) // cbr then ubr continue; - } else if (Cond.size() == 0) { + } else if (Cond.empty()) { if (TBB!=IBB) // ubr continue; } else { @@ -693,68 +703,6 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { } -/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the -/// CFG to be inserted. If we have proven that MBB can only branch to DestA and -/// DestB, remove any other MBB successors from the CFG. DestA and DestB can -/// be null. -/// Besides DestA and DestB, retain other edges leading to LandingPads (currently -/// there can be only one; we don't check or require that here). -/// Note it is possible that DestA and/or DestB are LandingPads. -static bool CorrectExtraCFGEdges(MachineBasicBlock &MBB, - MachineBasicBlock *DestA, - MachineBasicBlock *DestB, - bool isCond, - MachineFunction::iterator FallThru) { - bool MadeChange = false; - bool AddedFallThrough = false; - - // If this block ends with a conditional branch that falls through to its - // successor, set DestB as the successor. - if (isCond) { - if (DestB == 0 && FallThru != MBB.getParent()->end()) { - DestB = FallThru; - AddedFallThrough = true; - } - } else { - // If this is an unconditional branch with no explicit dest, it must just be - // a fallthrough into DestB. - if (DestA == 0 && FallThru != MBB.getParent()->end()) { - DestA = FallThru; - AddedFallThrough = true; - } - } - - MachineBasicBlock::succ_iterator SI = MBB.succ_begin(); - MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; - while (SI != MBB.succ_end()) { - if (*SI == DestA && DestA == DestB) { - DestA = DestB = 0; - ++SI; - } else if (*SI == DestA) { - DestA = 0; - ++SI; - } else if (*SI == DestB) { - DestB = 0; - ++SI; - } else if ((*SI)->isLandingPad() && - *SI!=OrigDestA && *SI!=OrigDestB) { - ++SI; - } else { - // Otherwise, this is a superfluous edge, remove it. - MBB.removeSuccessor(SI); - MadeChange = true; - } - } - if (!AddedFallThrough) { - assert(DestA == 0 && DestB == 0 && - "MachineCFG is missing edges!"); - } else if (isCond) { - assert(DestA == 0 && "MachineCFG is missing edges!"); - } - return MadeChange; -} - - /// CanFallThrough - Return true if the specified block (with the specified /// branch condition) can implicitly transfer control to the block after it by /// falling off the end of it. This should return false if it can reach the @@ -816,17 +764,21 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) { /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will /// result in infinite loops. static bool IsBetterFallthrough(MachineBasicBlock *MBB1, - MachineBasicBlock *MBB2, - const TargetInstrInfo &TII) { + MachineBasicBlock *MBB2) { // Right now, we use a simple heuristic. If MBB2 ends with a call, and // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block // into a fallthrough to the return. if (MBB1->empty() || MBB2->empty()) return false; + + // If there is a clear successor ordering we make sure that one block + // will fall through to the next + if (MBB1->isSuccessor(MBB2)) return true; + if (MBB2->isSuccessor(MBB1)) return false; MachineInstr *MBB1I = --MBB1->end(); MachineInstr *MBB2I = --MBB2->end(); - return TII.isCall(MBB2I->getOpcode()) && !TII.isCall(MBB1I->getOpcode()); + return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); } /// OptimizeBlock - Analyze and optimize control flow related to the specified @@ -871,8 +823,8 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); if (!PriorUnAnalyzable) { // If the CFG for the prior block has extra edges, remove them. - MadeChange |= CorrectExtraCFGEdges(PrevBB, PriorTBB, PriorFBB, - !PriorCond.empty(), MBB); + MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, + !PriorCond.empty()); // If the previous branch is conditional and both conditions go to the same // destination, remove the branch, replacing it with an unconditional one or @@ -939,7 +891,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // last. Only do the swap if one is clearly better to fall through than // the other. if (FallThrough == --MBB->getParent()->end() && - !IsBetterFallthrough(PriorTBB, MBB, *TII)) + !IsBetterFallthrough(PriorTBB, MBB)) DoTransform = false; // We don't want to do this transformation if we have control flow like: @@ -984,9 +936,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool CurUnAnalyzable = TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond); if (!CurUnAnalyzable) { // If the CFG for the prior block has extra edges, remove them. - MadeChange |= CorrectExtraCFGEdges(*MBB, CurTBB, CurFBB, - !CurCond.empty(), - ++MachineFunction::iterator(MBB)); + MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); // If this is a two-way branch, and the FBB branches to this block, reverse // the condition so the single-basic-block loop is faster. Instead of: @@ -1008,7 +958,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // If this branch is the only thing in its block, see if we can forward // other blocks across it. if (CurTBB && CurCond.empty() && CurFBB == 0 && - TII->isBranch(MBB->begin()->getOpcode()) && CurTBB != MBB) { + MBB->begin()->getDesc().isBranch() && CurTBB != MBB) { // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and // then seeing if the block is empty. @@ -1040,17 +990,18 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } // Iterate through all the predecessors, revectoring each in-turn. - MachineBasicBlock::pred_iterator PI = MBB->pred_begin(); + size_t PI = 0; bool DidChange = false; bool HasBranchToSelf = false; - while (PI != MBB->pred_end()) { - if (*PI == MBB) { + while(PI != MBB->pred_size()) { + MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI); + if (PMBB == MBB) { // If this block has an uncond branch to itself, leave it. ++PI; HasBranchToSelf = true; } else { DidChange = true; - (*PI)->ReplaceUsesOfBlockWith(MBB, CurTBB); + PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); } }