#define LLVM_SUPPORT_BRANCHPROBABILITY_H
#include "llvm/Support/DataTypes.h"
+#include <algorithm>
#include <cassert>
+#include <climits>
+#include <numeric>
namespace llvm {
template <class ProbabilityList>
static void normalizeProbabilities(ProbabilityList &Probs);
+ // Normalize a list of weights by scaling them down so that the sum of them
+ // doesn't exceed UINT32_MAX.
+ template <class WeightListIter>
+ static void normalizeEdgeWeights(WeightListIter Begin, WeightListIter End);
+
uint32_t getNumerator() const { return N; }
static uint32_t getDenominator() { return D; }
Prob.N = (Prob.N * uint64_t(D) + Sum / 2) / Sum;
}
+template <class WeightListIter>
+void BranchProbability::normalizeEdgeWeights(WeightListIter Begin,
+ WeightListIter End) {
+ // First we compute the sum with 64-bits of precision.
+ uint64_t Sum = std::accumulate(Begin, End, uint64_t(0));
+
+ if (Sum > UINT32_MAX) {
+ // Compute the scale necessary to cause the weights to fit, and re-sum with
+ // that scale applied.
+ assert(Sum / UINT32_MAX < UINT32_MAX &&
+ "The sum of weights exceeds UINT32_MAX^2!");
+ uint32_t Scale = Sum / UINT32_MAX + 1;
+ for (auto I = Begin; I != End; ++I)
+ *I /= Scale;
+ Sum = std::accumulate(Begin, End, uint64_t(0));
+ }
+
+ // Eliminate zero weights.
+ auto ZeroWeightNum = std::count(Begin, End, 0u);
+ if (ZeroWeightNum > 0) {
+ // If all weights are zeros, replace them by 1.
+ if (Sum == 0)
+ std::fill(Begin, End, 1u);
+ else {
+ // We are converting zeros into ones, and here we need to make sure that
+ // after this the sum won't exceed UINT32_MAX.
+ if (Sum + ZeroWeightNum > UINT32_MAX) {
+ for (auto I = Begin; I != End; ++I)
+ *I /= 2;
+ ZeroWeightNum = std::count(Begin, End, 0u);
+ Sum = std::accumulate(Begin, End, uint64_t(0));
+ }
+ // Scale up non-zero weights and turn zero weights into ones.
+ uint64_t ScalingFactor = (UINT32_MAX - ZeroWeightNum) / Sum;
+ assert(ScalingFactor >= 1);
+ if (ScalingFactor > 1)
+ for (auto I = Begin; I != End; ++I)
+ *I *= ScalingFactor;
+ std::replace(Begin, End, 0u, 1u);
+ }
+ }
+}
+
}
#endif
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <algorithm>
+#include <memory>
using namespace llvm;
#define DEBUG_TYPE "jump-threading"
class JumpThreading : public FunctionPass {
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ bool HasProfileData;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
#else
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
+ void releaseMemory() override {
+ BFI.reset();
+ BPI.reset();
+ }
+
void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+
+ private:
+ BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+ const char *Suffix);
+ void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
+ BasicBlock *NewBB, BasicBlock *SuccBB);
};
}
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
LVI = &getAnalysis<LazyValueInfo>();
+ BFI.reset();
+ BPI.reset();
+ // When profile data is available, we need to update edge weights after
+ // successful jump threading, which requires both BPI and BFI being available.
+ HasProfileData = F.getEntryCount().hasValue();
+ if (HasProfileData) {
+ LoopInfo LI{DominatorTree(F)};
+ BPI.reset(new BranchProbabilityInfo(F, LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+ }
// Remove unreachable blocks from function as they may result in infinite
// loop. We do threading if we found something profitable. Jump threading a
}
// Split them out to their own block.
- UnavailablePred =
- SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split");
+ UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
}
// If the value isn't available in all predecessors, then there will be
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+ PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
}
// And finally, do it!
BB->getParent(), BB);
NewBB->moveAfter(PredBB);
+ // Set the block frequency of NewBB.
+ if (HasProfileData) {
+ auto NewBBFreq =
+ BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
+ BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ }
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
- BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+ BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// frequently happens because of phi translation.
SimplifyInstructionsInBlock(NewBB, TLI);
+ // Update the edge weight from BB to SuccBB, which should be less than before.
+ UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
+
// Threaded an edge!
++NumThreads;
return true;
}
+/// Create a new basic block that will be the predecessor of BB and successor of
+/// all blocks in Preds. When profile data is availble, update the frequency of
+/// this new block.
+BasicBlock *JumpThreading::SplitBlockPreds(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix) {
+ // Collect the frequencies of all predecessors of BB, which will be used to
+ // update the edge weight on BB->SuccBB.
+ BlockFrequency PredBBFreq(0);
+ if (HasProfileData)
+ for (auto Pred : Preds)
+ PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
+
+ BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix);
+
+ // Set the block frequency of the newly created PredBB, which is the sum of
+ // frequencies of Preds.
+ if (HasProfileData)
+ BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
+ return PredBB;
+}
+
+/// Update the block frequency of BB and branch weight and the metadata on the
+/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
+/// Freq(PredBB->BB) / Freq(BB->SuccBB).
+void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
+ BasicBlock *BB,
+ BasicBlock *NewBB,
+ BasicBlock *SuccBB) {
+ if (!HasProfileData)
+ return;
+
+ assert(BFI && BPI && "BFI & BPI should have been created here");
+
+ // As the edge from PredBB to BB is deleted, we have to update the block
+ // frequency of BB.
+ auto BBOrigFreq = BFI->getBlockFreq(BB);
+ auto NewBBFreq = BFI->getBlockFreq(NewBB);
+ auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
+ auto BBNewFreq = BBOrigFreq - NewBBFreq;
+ BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
+
+ // Collect updated outgoing edges' frequencies from BB and use them to update
+ // edge weights.
+ SmallVector<uint64_t, 4> BBSuccFreq;
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ auto SuccFreq = (*I == SuccBB)
+ ? BB2SuccBBFreq - NewBBFreq
+ : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+ BBSuccFreq.push_back(SuccFreq.getFrequency());
+ }
+
+ // Normalize edge weights in Weights64 so that the sum of them can fit in
+ BranchProbability::normalizeEdgeWeights(BBSuccFreq.begin(), BBSuccFreq.end());
+
+ SmallVector<uint32_t, 4> Weights;
+ for (auto Freq : BBSuccFreq)
+ Weights.push_back(static_cast<uint32_t>(Freq));
+
+ // Update edge weights in BPI.
+ for (int I = 0, E = Weights.size(); I < E; I++)
+ BPI->setEdgeWeight(BB, I, Weights[I]);
+
+ if (Weights.size() >= 2) {
+ auto TI = BB->getTerminator();
+ TI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
+ }
+}
+
/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
/// If we can duplicate the contents of BB up into PredBB do so now, this
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+ PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
}
}
+TEST(BranchProbabilityTest, NormalizeEdgeWeights) {
+ {
+ SmallVector<uint32_t, 2> Weights{0, 0};
+ BranchProbability::normalizeEdgeWeights(Weights.begin(), Weights.end());
+ EXPECT_EQ(1u, Weights[0]);
+ EXPECT_EQ(1u, Weights[1]);
+ }
+ {
+ SmallVector<uint32_t, 2> Weights{0, UINT32_MAX};
+ BranchProbability::normalizeEdgeWeights(Weights.begin(), Weights.end());
+ EXPECT_EQ(1u, Weights[0]);
+ EXPECT_EQ(UINT32_MAX - 1u, Weights[1]);
+ }
+ {
+ SmallVector<uint32_t, 2> Weights{1, UINT32_MAX};
+ BranchProbability::normalizeEdgeWeights(Weights.begin(), Weights.end());
+ EXPECT_EQ(1u, Weights[0]);
+ EXPECT_EQ(UINT32_MAX - 1u, Weights[1]);
+ }
+ {
+ SmallVector<uint32_t, 3> Weights{0, 0, UINT32_MAX};
+ BranchProbability::normalizeEdgeWeights(Weights.begin(), Weights.end());
+ EXPECT_EQ(1u, Weights[0]);
+ EXPECT_EQ(1u, Weights[1]);
+ EXPECT_EQ(UINT32_MAX / 2u, Weights[2]);
+ }
+ {
+ SmallVector<uint32_t, 2> Weights{UINT32_MAX, UINT32_MAX};
+ BranchProbability::normalizeEdgeWeights(Weights.begin(), Weights.end());
+ EXPECT_EQ(UINT32_MAX / 3u, Weights[0]);
+ EXPECT_EQ(UINT32_MAX / 3u, Weights[1]);
+ }
+ {
+ SmallVector<uint32_t, 3> Weights{UINT32_MAX, UINT32_MAX, UINT32_MAX};
+ BranchProbability::normalizeEdgeWeights(Weights.begin(), Weights.end());
+ EXPECT_EQ(UINT32_MAX / 4u, Weights[0]);
+ EXPECT_EQ(UINT32_MAX / 4u, Weights[1]);
+ EXPECT_EQ(UINT32_MAX / 4u, Weights[2]);
+ }
+}
+
}