X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineBlockPlacement.cpp;h=75745e5cb005c6fdab773f4b440b22d9ed5ce2c5;hb=d013411d966226efc5ff69cd5a74b2130156de7b;hp=511a55821d5567027c4f996efa55bbbc9d315197;hpb=16295fc20b68f9a9318cada4e4d96e964b1cdd7e;p=oota-llvm.git diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 511a55821d5..75745e5cb00 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -11,7 +11,7 @@ // structure and branch probability estimates. // // The pass strives to preserve the structure of the CFG (that is, retain -// a topological ordering of basic blocks) in the absense of a *strong* signal +// a topological ordering of basic blocks) in the absence of a *strong* signal // to the contrary from probabilities. However, within the CFG structure, it // attempts to choose an ordering which favors placing more likely sequences of // blocks adjacent to each other. @@ -26,6 +26,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "block-placement2" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -33,13 +38,9 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include @@ -52,6 +53,18 @@ STATISTIC(CondBranchTakenFreq, STATISTIC(UncondBranchTakenFreq, "Potential frequency of taking unconditional branches"); +static cl::opt AlignAllBlock("align-all-blocks", + cl::desc("Force the alignment of all " + "blocks in the function."), + cl::init(0), cl::Hidden); + +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt +ExitBlockBias("block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -63,17 +76,13 @@ namespace { /// /// This is the datastructure representing a chain of consecutive blocks that /// are profitable to layout together in order to maximize fallthrough -/// probabilities. We also can use a block chain to represent a sequence of -/// basic blocks which have some external (correctness) requirement for -/// sequential layout. +/// probabilities and code locality. We also can use a block chain to represent +/// a sequence of basic blocks which have some external (correctness) +/// requirement for sequential layout. /// -/// Eventually, the block chains will form a directed graph over the function. -/// We provide an SCC-supporting-iterator in order to quicky build and walk the -/// SCCs of block chains within a function. -/// -/// The block chains also have support for calculating and caching probability -/// information related to the chain itself versus other chains. This is used -/// for ranking during the final layout of block chains. +/// Chains can be built around a single basic block and can be merged to grow +/// them. They participate in a block-to-chain mapping, which is updated +/// automatically as chains are merged together. class BlockChain { /// \brief The sequence of blocks belonging to this chain. /// @@ -143,7 +152,7 @@ public: #ifndef NDEBUG /// \brief Dump the blocks in this chain. - void dump() LLVM_ATTRIBUTE_USED { + LLVM_DUMP_METHOD void dump() { for (iterator I = begin(), E = end(); I != E; ++I) (*I)->dump(); } @@ -175,14 +184,15 @@ class MachineBlockPlacement : public MachineFunctionPass { const TargetInstrInfo *TII; /// \brief A handle to the target's lowering info. - const TargetLowering *TLI; + const TargetLoweringBase *TLI; /// \brief Allocator and owner of BlockChain structures. /// - /// We build BlockChains lazily by merging together high probability BB - /// sequences acording to the "Algo2" in the paper mentioned at the top of - /// the file. To reduce malloc traffic, we allocate them using this slab-like - /// allocator, and destroy them after the pass completes. + /// We build BlockChains lazily while processing the loop structure of + /// a function. To reduce malloc traffic, we allocate them using this + /// slab-like allocator, and destroy them after the pass completes. An + /// important guarantee is that this allocator produces stable pointers to + /// the chains. SpecificBumpPtrAllocator ChainAllocator; /// \brief Function wide BasicBlock to BlockChain mapping. @@ -211,6 +221,8 @@ class MachineBlockPlacement : public MachineFunctionPass { void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &BlockWorkList, const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *findBestLoopTop(MachineLoop &L, + const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet); @@ -225,9 +237,9 @@ public: initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -327,7 +339,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( // the MBPI analysis, we manually compute probabilities using the edge // weights. This is suboptimal as it means that the somewhat subtle // definition of edge weight semantics is encoded here as well. We should - // improve the MBPI interface to effeciently support query patterns such as + // improve the MBPI interface to efficiently support query patterns such as // this. uint32_t BestWeight = 0; uint32_t WeightScale = 0; @@ -355,7 +367,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (CFG conflict)\n"); continue; } @@ -378,8 +391,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( } } if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(*SI) - << " -> non-cold CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (non-cold CFG conflict)\n"); continue; } } @@ -396,23 +409,6 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( return BestSucc; } -namespace { -/// \brief Predicate struct to detect blocks already placed. -class IsBlockPlaced { - const BlockChain &PlacedChain; - const BlockToChainMapType &BlockToChain; - -public: - IsBlockPlaced(const BlockChain &PlacedChain, - const BlockToChainMapType &BlockToChain) - : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} - - bool operator()(MachineBasicBlock *BB) const { - return BlockToChain.lookup(BB) == &PlacedChain; - } -}; -} - /// \brief Select the best block from a worklist. /// /// This looks through the provided worklist as a list of candidate basic @@ -431,7 +427,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), - IsBlockPlaced(Chain, BlockToChain)), + [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); MachineBasicBlock *BestBlock = 0; @@ -448,8 +446,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq - << " (freq)\n"); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); if (BestBlock && BestFreq >= CandidateFreq) continue; BestBlock = *WBI; @@ -496,16 +494,15 @@ void MachineBlockPlacement::buildChain( MachineBasicBlock *LoopHeaderBB = BB; markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); for (;;) { assert(BB); assert(BlockToChain[BB] == &Chain); - assert(*llvm::prior(Chain.end()) == BB); - MachineBasicBlock *BestSucc = 0; + assert(*std::prev(Chain.end()) == BB); // Look for the best viable successor if there is one to place immediately // after this block. - BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at @@ -532,7 +529,7 @@ void MachineBlockPlacement::buildChain( << " to " << getBlockNum(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); } DEBUG(dbgs() << "Finished forming chain for header block " @@ -541,6 +538,67 @@ void MachineBlockPlacement::buildChain( /// \brief Find the best loop top block for layout. /// +/// Look for a block which is strictly better than the loop header for laying +/// out at the top of the loop. This looks for one and only one pattern: +/// a latch block with no conditional exit. This block will cause a conditional +/// jump around it or will be the bottom of the loop if we lay it out in place, +/// but if it it doesn't end up at the bottom of the loop for any reason, +/// rotation alone won't fix it. Because such a block will always result in an +/// unconditional jump (for the backedge) rotating it in front of the loop +/// header is always profitable. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // Check that the header hasn't been fused with a preheader block due to + // crazy branches. If it has, we need to start with the header at the top to + // prevent pulling the preheader into the loop body. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return L.getHeader(); + + DEBUG(dbgs() << "Finding best loop top for: " + << getBlockName(L.getHeader()) << "\n"); + + BlockFrequency BestPredFreq; + MachineBasicBlock *BestPred = 0; + for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), + PE = L.getHeader()->pred_end(); + PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + if (!LoopBlockSet.count(Pred)) + continue; + DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " + << Pred->succ_size() << " successors, "; + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); + if (Pred->succ_size() > 1) + continue; + + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + if (!BestPred || PredFreq > BestPredFreq || + (!(PredFreq < BestPredFreq) && + Pred->isLayoutSuccessor(L.getHeader()))) { + BestPred = Pred; + BestPredFreq = PredFreq; + } + } + + // If no direct predecessor is fine, just use the loop header. + if (!BestPred) + return L.getHeader(); + + // Walk backwards through any straight line of predecessors. + while (BestPred->pred_size() == 1 && + (*BestPred->pred_begin())->succ_size() == 1 && + *BestPred->pred_begin() != L.getHeader()) + BestPred = *BestPred->pred_begin(); + + DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n"); + return BestPred; +} + + +/// \brief Find the best loop exiting block for layout. +/// /// This routine implements the logic to analyze the loop looking for the best /// block to layout at the top of the loop. Typically this is done to maximize /// fallthrough opportunities. @@ -576,7 +634,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockChain &Chain = *BlockToChain[*I]; // Ensure that this block is at the end of a chain; otherwise it could be // mid-way through an inner loop or a successor of an analyzable branch. - if (*I != *llvm::prior(Chain.end())) + if (*I != *std::prev(Chain.end())) continue; // Now walk the successors. We need to establish whether this has a viable @@ -625,14 +683,17 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " [L:" << SuccLoopDepth - << "] (" << ExitEdgeFreq << ")\n"); - // Note that we slightly bias this toward an existing layout successor to - // retain incoming order in the absence of better information. - // FIXME: Should we bias this more strongly? It's pretty weak. + << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + // Note that we bias this toward an existing layout successor to retain + // incoming order in the absence of better information. The exit must have + // a frequency higher than the current exit before we consider breaking + // the layout. + BranchProbability Bias(100 - ExitBlockBias, 100); if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || ((*I)->isLayoutSuccessor(*SI) && - !(ExitEdgeFreq < BestExitEdgeFreq))) { + !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { BestExitEdgeFreq = ExitEdgeFreq; ExitingBB = *I; } @@ -680,7 +741,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, PI != PE; ++PI) { BlockChain *PredChain = BlockToChain[*PI]; if (!LoopBlockSet.count(*PI) && - (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + (!PredChain || *PI == *std::prev(PredChain->end()))) { ViableTopFallthrough = true; break; } @@ -690,7 +751,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), SE = Bottom->succ_end(); SI != SE; ++SI) { @@ -706,7 +767,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (ExitIt == LoopChain.end()) return; - std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); + std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } /// \brief Forms basic block chains from the natural loop structures. @@ -725,8 +786,20 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallVector BlockWorkList; BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); - MachineBasicBlock *ExitingBB = findBestLoopExit(F, L, LoopBlockSet); - BlockChain &LoopChain = *BlockToChain[L.getHeader()]; + // First check to see if there is an obviously preferable top block for the + // loop. This will default to the header, but may end up as one of the + // predecessors to the header if there is one which will result in strictly + // fewer branches in the loop body. + MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); + + // If we selected just the header for the loop top, look for a potentially + // profitable exit block in the event that rotating the loop can eliminate + // branches by placing an exit edge at the bottom. + MachineBasicBlock *ExitingBB = 0; + if (LoopTop == L.getHeader()) + ExitingBB = findBestLoopExit(F, L, LoopBlockSet); + + BlockChain &LoopChain = *BlockToChain[LoopTop]; // FIXME: This is a really lame way of walking the chains in the loop: we // walk the blocks, and use a set to prevent visiting a particular chain @@ -758,7 +831,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BlockWorkList.push_back(*Chain.begin()); } - buildChain(L.getHeader(), LoopChain, BlockWorkList, &LoopBlockSet); + buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); rotateLoop(LoopChain, ExitingBB, LoopBlockSet); DEBUG({ @@ -814,7 +887,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; - MachineFunction::iterator NextFI(llvm::next(FI)); + MachineFunction::iterator NextFI(std::next(FI)); MachineBasicBlock *NextBB = NextFI; // Ensure that the layout successor is a viable block, as we know that // fallthrough is a possibility. @@ -862,7 +935,9 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { BlockChain &FunctionChain = *BlockToChain[&F.front()]; buildChain(&F.front(), FunctionChain, BlockWorkList); +#ifndef NDEBUG typedef SmallPtrSet FunctionBlockSetType; +#endif DEBUG({ // Crash at the end so we get all of the debugging output first. bool BadFunc = false; @@ -906,15 +981,53 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Update the terminator of the previous block. if (BI == FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this // boiler plate. Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) - PrevBB->updateTerminator(); + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // The "PrevBB" is not yet updated to reflect current code layout, so, + // o. it may fall-through to a block without explict "goto" instruction + // before layout, and no longer fall-through it after layout; or + // o. just opposite. + // + // AnalyzeBranch() may return erroneous value for FBB when these two + // situations take place. For the first scenario FBB is mistakenly set + // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, + // is mistakenly pointing to "*BI". + // + bool needUpdateBr = true; + if (!Cond.empty() && (!FBB || FBB == *BI)) { + PrevBB->updateTerminator(); + needUpdateBr = false; + Cond.clear(); + TBB = FBB = 0; + if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // FIXME: This should never take place. + TBB = FBB = 0; + } + } + + // If PrevBB has a two-way branch, try to re-order the branches + // such that we branch to the successor with higher weight first. + if (TBB && !Cond.empty() && FBB && + MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && + !TII->ReverseBranchCondition(Cond)) { + DEBUG(dbgs() << "Reverse order of the two branches: " + << getBlockName(PrevBB) << "\n"); + DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) + << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PrevBB); + TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + needUpdateBr = true; + } + if (needUpdateBr) + PrevBB->updateTerminator(); + } } // Fixup the last block. @@ -925,35 +1038,70 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Walk through the backedges of the function now that we have fully laid out // the basic blocks and align the destination of each backedge. We don't rely - // on the loop info here so that we can align backedges in unnatural CFGs and - // backedges that were introduced purely because of the loop rotations done - // during this layout pass. - // FIXME: This isn't quite right, we shouldn't align backedges that result - // from blocks being sunken below the exit block for the function. - if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + // exclusively on the loop info here so that we can align backedges in + // unnatural CFGs and backedges that were introduced purely because of the + // loop rotations done during this layout pass. + if (F.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. + if (FunctionChain.begin() == FunctionChain.end()) + return; // Empty chain. - SmallPtrSet PreviousBlocks; - for (BlockChain::iterator BI = FunctionChain.begin(), + const BranchProbability ColdProb(1, 5); // 20% + BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; + for (BlockChain::iterator BI = std::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { - PreviousBlocks.insert(*BI); - // Set alignment on the destination of all the back edges in the new - // ordering. - for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), - SE = (*BI)->succ_end(); - SI != SE; ++SI) - if (PreviousBlocks.count(*SI)) - (*SI)->setAlignment(Align); + // Don't align non-looping basic blocks. These are unlikely to execute + // enough times to matter in practice. Note that we'll still handle + // unnatural CFGs inside of a natural outer loop (the common case) and + // rotated loops. + MachineLoop *L = MLI->getLoopFor(*BI); + if (!L) + continue; + + // If the block is cold relative to the function entry don't waste space + // aligning it. + BlockFrequency Freq = MBFI->getBlockFreq(*BI); + if (Freq < WeightedEntryFreq) + continue; + + // If the block is cold relative to its loop header, don't align it + // regardless of what edges into the block exist. + MachineBasicBlock *LoopHeader = L->getHeader(); + BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); + if (Freq < (LoopHeaderFreq * ColdProb)) + continue; + + // Check for the existence of a non-layout predecessor which would benefit + // from aligning this block. + MachineBasicBlock *LayoutPred = *std::prev(BI); + + // Force alignment if all the predecessors are jumps. We already checked + // that the block isn't cold above. + if (!LayoutPred->isSuccessor(*BI)) { + (*BI)->setAlignment(Align); + continue; + } + + // Align this block if the layout predecessor's edge into this block is + // cold relative to the block. When this is true, other predecessors make up + // all of the hot entries into the block and thus alignment is likely to be + // important. + BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; + if (LayoutEdgeFreq <= (Freq * ColdProb)) + (*BI)->setAlignment(Align); } } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) return false; MBPI = &getAnalysis(); @@ -968,6 +1116,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { BlockToChain.clear(); ChainAllocator.DestroyAll(); + if (AlignAllBlock) + // Align all of the blocks in the function to a specific alignment. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + FI->setAlignment(AlignAllBlock); + // We always return true as we have no way to track whether the final order // differs from the original order. return true; @@ -978,7 +1132,7 @@ namespace { /// /// A separate pass to compute interesting statistics for evaluating block /// placement. This is separate from the actual placement pass so that they can -/// be computed in the absense of any placement transformations or when using +/// be computed in the absence of any placement transformations or when using /// alternative placement strategies. class MachineBlockPlacementStats : public MachineFunctionPass { /// \brief A handle to the branch probability pass. @@ -993,9 +1147,9 @@ public: initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); @@ -1015,7 +1169,7 @@ INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) return false; MBPI = &getAnalysis();