X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineBlockPlacement.cpp;h=74af1e2d64b02d2acc15c0acb98ca05a4378c2cf;hb=1ef2cec146099f4bf46c31b913c7f1538935a867;hp=1f1ce671f50740351eea3e878f5de435e2edc344;hpb=2c189061184925c6a8ecbb5a19e648b230a41c0e;p=oota-llvm.git diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 1f1ce671f50..74af1e2d64b 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -25,7 +25,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "block-placement2" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -33,18 +37,16 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include using namespace llvm; +#define DEBUG_TYPE "block-placement2" + STATISTIC(NumCondBranches, "Number of conditional branches"); STATISTIC(NumUncondBranches, "Number of uncondittional branches"); STATISTIC(CondBranchTakenFreq, @@ -52,6 +54,18 @@ STATISTIC(CondBranchTakenFreq, STATISTIC(UncondBranchTakenFreq, "Potential frequency of taking unconditional branches"); +static cl::opt AlignAllBlock("align-all-blocks", + cl::desc("Force the alignment of all " + "blocks in the function."), + cl::init(0), cl::Hidden); + +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt +ExitBlockBias("block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -139,7 +153,7 @@ public: #ifndef NDEBUG /// \brief Dump the blocks in this chain. - void dump() LLVM_ATTRIBUTE_USED { + LLVM_DUMP_METHOD void dump() { for (iterator I = begin(), E = end(); I != E; ++I) (*I)->dump(); } @@ -171,7 +185,7 @@ class MachineBlockPlacement : public MachineFunctionPass { const TargetInstrInfo *TII; /// \brief A handle to the target's lowering info. - const TargetLowering *TLI; + const TargetLoweringBase *TLI; /// \brief Allocator and owner of BlockChain structures. /// @@ -193,7 +207,7 @@ class MachineBlockPlacement : public MachineFunctionPass { void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl &BlockWorkList, - const BlockFilterSet *BlockFilter = 0); + const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter); @@ -207,7 +221,7 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &BlockWorkList, - const BlockFilterSet *BlockFilter = 0); + const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit(MachineFunction &F, @@ -224,9 +238,9 @@ public: initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -321,7 +335,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(4, 5); // 80% - MachineBasicBlock *BestSucc = 0; + MachineBasicBlock *BestSucc = nullptr; // FIXME: Due to the performance of the probability and weight routines in // the MBPI analysis, we manually compute probabilities using the edge // weights. This is suboptimal as it means that the somewhat subtle @@ -354,7 +368,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (CFG conflict)\n"); continue; } @@ -377,8 +392,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( } } if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(*SI) - << " -> non-cold CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (non-cold CFG conflict)\n"); continue; } } @@ -395,23 +410,6 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( return BestSucc; } -namespace { -/// \brief Predicate struct to detect blocks already placed. -class IsBlockPlaced { - const BlockChain &PlacedChain; - const BlockToChainMapType &BlockToChain; - -public: - IsBlockPlaced(const BlockChain &PlacedChain, - const BlockToChainMapType &BlockToChain) - : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} - - bool operator()(MachineBasicBlock *BB) const { - return BlockToChain.lookup(BB) == &PlacedChain; - } -}; -} - /// \brief Select the best block from a worklist. /// /// This looks through the provided worklist as a list of candidate basic @@ -430,10 +428,12 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), - IsBlockPlaced(Chain, BlockToChain)), + [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); - MachineBasicBlock *BestBlock = 0; + MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; for (SmallVectorImpl::iterator WBI = WorkList.begin(), WBE = WorkList.end(); @@ -447,8 +447,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq - << " (freq)\n"); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); if (BestBlock && BestFreq >= CandidateFreq) continue; BestBlock = *WBI; @@ -480,7 +480,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( return *BlockToChain[I]->begin(); } } - return 0; + return nullptr; } void MachineBlockPlacement::buildChain( @@ -495,11 +495,11 @@ void MachineBlockPlacement::buildChain( MachineBasicBlock *LoopHeaderBB = BB; markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); for (;;) { assert(BB); assert(BlockToChain[BB] == &Chain); - assert(*llvm::prior(Chain.end()) == BB); + assert(*std::prev(Chain.end()) == BB); // Look for the best viable successor if there is one to place immediately // after this block. @@ -530,7 +530,7 @@ void MachineBlockPlacement::buildChain( << " to " << getBlockNum(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); } DEBUG(dbgs() << "Finished forming chain for header block " @@ -561,7 +561,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, << getBlockName(L.getHeader()) << "\n"); BlockFrequency BestPredFreq; - MachineBasicBlock *BestPred = 0; + MachineBasicBlock *BestPred = nullptr; for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), PE = L.getHeader()->pred_end(); PI != PE; ++PI) { @@ -569,8 +569,8 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, if (!LoopBlockSet.count(Pred)) continue; DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " - << Pred->succ_size() << " successors, " - << MBFI->getBlockFreq(Pred) << " freq\n"); + << Pred->succ_size() << " successors, "; + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) continue; @@ -617,11 +617,11 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // header and only rotate if safe. BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; if (!LoopBlockSet.count(*HeaderChain.begin())) - return 0; + return nullptr; BlockFrequency BestExitEdgeFreq; unsigned BestExitLoopDepth = 0; - MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *ExitingBB = nullptr; // If there are exits to outer loops, loop rotation can severely limit // fallthrough opportunites unless it selects such an exit. Keep a set of // blocks where rotating to exit with that block will reach an outer loop. @@ -635,7 +635,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockChain &Chain = *BlockToChain[*I]; // Ensure that this block is at the end of a chain; otherwise it could be // mid-way through an inner loop or a successor of an analyzable branch. - if (*I != *llvm::prior(Chain.end())) + if (*I != *std::prev(Chain.end())) continue; // Now walk the successors. We need to establish whether this has a viable @@ -684,14 +684,17 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " [L:" << SuccLoopDepth - << "] (" << ExitEdgeFreq << ")\n"); - // Note that we slightly bias this toward an existing layout successor to - // retain incoming order in the absence of better information. - // FIXME: Should we bias this more strongly? It's pretty weak. + << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + // Note that we bias this toward an existing layout successor to retain + // incoming order in the absence of better information. The exit must have + // a frequency higher than the current exit before we consider breaking + // the layout. + BranchProbability Bias(100 - ExitBlockBias, 100); if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || ((*I)->isLayoutSuccessor(*SI) && - !(ExitEdgeFreq < BestExitEdgeFreq))) { + !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { BestExitEdgeFreq = ExitEdgeFreq; ExitingBB = *I; } @@ -707,14 +710,14 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // Without a candidate exiting block or with only a single block in the // loop, just use the loop header to layout the loop. if (!ExitingBB || L.getNumBlocks() == 1) - return 0; + return nullptr; // Also, if we have exit blocks which lead to outer loops but didn't select // one of them as the exiting block we are rotating toward, disable loop // rotation altogether. if (!BlocksExitingToOuterLoop.empty() && !BlocksExitingToOuterLoop.count(ExitingBB)) - return 0; + return nullptr; DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); return ExitingBB; @@ -739,7 +742,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, PI != PE; ++PI) { BlockChain *PredChain = BlockToChain[*PI]; if (!LoopBlockSet.count(*PI) && - (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + (!PredChain || *PI == *std::prev(PredChain->end()))) { ViableTopFallthrough = true; break; } @@ -749,7 +752,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), SE = Bottom->succ_end(); SI != SE; ++SI) { @@ -765,7 +768,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (ExitIt == LoopChain.end()) return; - std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); + std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } /// \brief Forms basic block chains from the natural loop structures. @@ -793,7 +796,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. - MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *ExitingBB = nullptr; if (LoopTop == L.getHeader()) ExitingBB = findBestLoopExit(F, L, LoopBlockSet); @@ -881,11 +884,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // the exact fallthrough behavior for. for (;;) { Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; - MachineFunction::iterator NextFI(llvm::next(FI)); + MachineFunction::iterator NextFI(std::next(FI)); MachineBasicBlock *NextBB = NextFI; // Ensure that the layout successor is a viable block, as we know that // fallthrough is a possibility. @@ -893,7 +896,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " << getBlockName(BB) << " -> " << getBlockName(NextBB) << "\n"); - Chain->merge(NextBB, 0); + Chain->merge(NextBB, nullptr); FI = NextFI; BB = NextBB; } @@ -933,7 +936,9 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { BlockChain &FunctionChain = *BlockToChain[&F.front()]; buildChain(&F.front(), FunctionChain, BlockWorkList); +#ifndef NDEBUG typedef SmallPtrSet FunctionBlockSetType; +#endif DEBUG({ // Crash at the end so we get all of the debugging output first. bool BadFunc = false; @@ -977,14 +982,36 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Update the terminator of the previous block. if (BI == FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this // boiler plate. Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // The "PrevBB" is not yet updated to reflect current code layout, so, + // o. it may fall-through to a block without explict "goto" instruction + // before layout, and no longer fall-through it after layout; or + // o. just opposite. + // + // AnalyzeBranch() may return erroneous value for FBB when these two + // situations take place. For the first scenario FBB is mistakenly set + // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, + // is mistakenly pointing to "*BI". + // + bool needUpdateBr = true; + if (!Cond.empty() && (!FBB || FBB == *BI)) { + PrevBB->updateTerminator(); + needUpdateBr = false; + Cond.clear(); + TBB = FBB = nullptr; + if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // FIXME: This should never take place. + TBB = FBB = nullptr; + } + } + // If PrevBB has a two-way branch, try to re-order the branches // such that we branch to the successor with higher weight first. if (TBB && !Cond.empty() && FBB && @@ -997,14 +1024,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + needUpdateBr = true; } - PrevBB->updateTerminator(); + if (needUpdateBr) + PrevBB->updateTerminator(); } } // Fixup the last block. Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) F.back().updateTerminator(); @@ -1013,7 +1042,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F.getFunction()->getFnAttributes().hasOptimizeForSizeAttr()) + if (F.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) @@ -1024,7 +1054,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { const BranchProbability ColdProb(1, 5); // 20% BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; - for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), + for (BlockChain::iterator BI = std::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { // Don't align non-looping basic blocks. These are unlikely to execute @@ -1050,7 +1080,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Check for the existence of a non-layout predecessor which would benefit // from aligning this block. - MachineBasicBlock *LayoutPred = *llvm::prior(BI); + MachineBasicBlock *LayoutPred = *std::prev(BI); // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. @@ -1060,7 +1090,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } // Align this block if the layout predecessor's edge into this block is - // cold relative to the block. When this is true, othe predecessors make up + // cold relative to the block. When this is true, other predecessors make up // all of the hot entries into the block and thus alignment is likely to be // important. BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); @@ -1072,7 +1102,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) + return false; + + if (skipOptnoneFunction(*F.getFunction())) return false; MBPI = &getAnalysis(); @@ -1087,6 +1120,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { BlockToChain.clear(); ChainAllocator.DestroyAll(); + if (AlignAllBlock) + // Align all of the blocks in the function to a specific alignment. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + FI->setAlignment(AlignAllBlock); + // We always return true as we have no way to track whether the final order // differs from the original order. return true; @@ -1112,9 +1151,9 @@ public: initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); @@ -1134,7 +1173,7 @@ INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) return false; MBPI = &getAnalysis();