Add getUnrollingPreferences to TTI
[oota-llvm.git] / include / llvm / Analysis / BlockFrequencyImpl.h
index 6580fd1e4a9c6ea3467daa7aa2a2605d51d0e974..817a44188b894a11005c431e637075f664819748 100644 (file)
@@ -1,4 +1,4 @@
-//===---- BlockFrequencyImpl.h - Machine Block Frequency Implementation ---===//
+//===-- BlockFrequencyImpl.h - Block Frequency Implementation --*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H
 #define LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H
 
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include <vector>
-#include <sstream>
 #include <string>
+#include <vector>
 
 namespace llvm {
 
 
-class BlockFrequency;
-class MachineBlockFrequency;
+class BlockFrequencyInfo;
+class MachineBlockFrequencyInfo;
 
 /// BlockFrequencyImpl implements block frequency algorithm for IR and
-/// Machine Instructions. Algorithm starts with value 1024 (START_FREQ)
+/// Machine Instructions. Algorithm starts with value ENTRY_FREQ
 /// for the entry block and then propagates frequencies using branch weights
 /// from (Machine)BranchProbabilityInfo. LoopInfo is not required because
 /// algorithm can find "backedges" by itself.
 template<class BlockT, class FunctionT, class BlockProbInfoT>
 class BlockFrequencyImpl {
 
-  DenseMap<BlockT *, uint32_t> Freqs;
+  DenseMap<const BlockT *, BlockFrequency> Freqs;
 
   BlockProbInfoT *BPI;
 
@@ -48,72 +48,53 @@ class BlockFrequencyImpl {
 
   typedef GraphTraits< Inverse<BlockT *> > GT;
 
-  static const uint32_t START_FREQ = 1024;
+  const uint32_t EntryFreq;
 
   std::string getBlockName(BasicBlock *BB) const {
-    return BB->getNameStr();
+    return BB->getName().str();
   }
 
   std::string getBlockName(MachineBasicBlock *MBB) const {
-    std::stringstream ss;
+    std::string str;
+    raw_string_ostream ss(str);
     ss << "BB#" << MBB->getNumber();
 
     if (const BasicBlock *BB = MBB->getBasicBlock())
-      ss << " derived from LLVM BB " << BB->getNameStr();
+      ss << " derived from LLVM BB " << BB->getName();
 
     return ss.str();
   }
 
-  void setBlockFreq(BlockT *BB, uint32_t Freq) {
+  void setBlockFreq(BlockT *BB, BlockFrequency Freq) {
     Freqs[BB] = Freq;
     DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = " << Freq << "\n");
   }
 
   /// getEdgeFreq - Return edge frequency based on SRC frequency and Src -> Dst
   /// edge probability.
-  uint32_t getEdgeFreq(BlockT *Src, BlockT *Dst) const {
+  BlockFrequency getEdgeFreq(BlockT *Src, BlockT *Dst) const {
     BranchProbability Prob = BPI->getEdgeProbability(Src, Dst);
-    uint64_t N = Prob.getNumerator();
-    uint64_t D = Prob.getDenominator();
-    uint64_t Res = (N * getBlockFreq(Src)) / D;
-
-    assert(Res <= UINT32_MAX);
-    return (uint32_t) Res;
+    return getBlockFreq(Src) * Prob;
   }
 
   /// incBlockFreq - Increase BB block frequency by FREQ.
   ///
-  void incBlockFreq(BlockT *BB, uint32_t Freq) {
+  void incBlockFreq(BlockT *BB, BlockFrequency Freq) {
     Freqs[BB] += Freq;
     DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += " << Freq
                  << " --> " << Freqs[BB] << "\n");
   }
 
-  /// divBlockFreq - Divide BB block frequency by PROB. If Prob = 0 do nothing.
-  ///
-  void divBlockFreq(BlockT *BB, BranchProbability Prob) {
-    uint64_t N = Prob.getNumerator();
-    assert(N && "Illegal division by zero!");
-    uint64_t D = Prob.getDenominator();
-    uint64_t Freq = (Freqs[BB] * D) / N;
-
-    // Should we assert it?
-    if (Freq > UINT32_MAX)
-      Freq = UINT32_MAX;
-
-    Freqs[BB] = (uint32_t) Freq;
-    DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") /= (" << Prob
-                 << ") --> " << Freqs[BB] << "\n");
-  }
-
   // All blocks in postorder.
   std::vector<BlockT *> POT;
 
   // Map Block -> Position in reverse-postorder list.
   DenseMap<BlockT *, unsigned> RPO;
 
-  // Cycle Probability for each bloch.
-  DenseMap<BlockT *, uint32_t> CycleProb;
+  // For each loop header, record the per-iteration probability of exiting the
+  // loop. This is the reciprocal of the expected number of loop iterations.
+  typedef DenseMap<BlockT*, BranchProbability> LoopExitProbMap;
+  LoopExitProbMap LoopExitProb;
 
   // (reverse-)postorder traversal iterators.
   typedef typename std::vector<BlockT *>::iterator pot_iterator;
@@ -127,7 +108,7 @@ class BlockFrequencyImpl {
 
   rpot_iterator rpot_at(BlockT *BB) {
     rpot_iterator I = rpot_begin();
-    unsigned idx = RPO[BB];
+    unsigned idx = RPO.lookup(BB);
     assert(idx);
     std::advance(I, idx - 1);
 
@@ -135,32 +116,15 @@ class BlockFrequencyImpl {
     return I;
   }
 
-
-  /// Return a probability of getting to the DST block through SRC->DST edge.
-  ///
-  BranchProbability getBackEdgeProbability(BlockT *Src, BlockT *Dst) const {
-    uint32_t N = getEdgeFreq(Src, Dst);
-    uint32_t D = getBlockFreq(Dst);
-
-    return BranchProbability(N, D);
-  }
-
-  /// isReachable - Returns if BB block is reachable from the entry.
+  /// isBackedge - Return if edge Src -> Dst is a reachable backedge.
   ///
-  bool isReachable(BlockT *BB) {
-    return RPO.count(BB);
-  }
-
-  /// isBackedge - Return if edge Src -> Dst is a backedge.
-  ///
-  bool isBackedge(BlockT *Src, BlockT *Dst) {
-    assert(isReachable(Src));
-    assert(isReachable(Dst));
-
-    unsigned a = RPO[Src];
-    unsigned b = RPO[Dst];
-
-    return a > b;
+  bool isBackedge(BlockT *Src, BlockT *Dst) const {
+    unsigned a = RPO.lookup(Src);
+    if (!a)
+      return false;
+    unsigned b = RPO.lookup(Dst);
+    assert(b && "Destination block should be reachable");
+    return a >= b;
   }
 
   /// getSingleBlockPred - return single BB block predecessor or NULL if
@@ -189,7 +153,7 @@ class BlockFrequencyImpl {
     setBlockFreq(BB, 0);
 
     if (BB == LoopHead) {
-      setBlockFreq(BB, START_FREQ);
+      setBlockFreq(BB, EntryFreq);
       return;
     }
 
@@ -209,7 +173,7 @@ class BlockFrequencyImpl {
          PI != PE; ++PI) {
       BlockT *Pred = *PI;
 
-      if (isReachable(Pred) && isBackedge(Pred, BB)) {
+      if (isBackedge(Pred, BB)) {
         isLoopHead = true;
       } else if (BlocksInLoop.count(Pred)) {
         incBlockFreq(BB, getEdgeFreq(Pred, BB));
@@ -224,47 +188,82 @@ class BlockFrequencyImpl {
     if (!isLoopHead)
       return;
 
-    assert(START_FREQ >= CycleProb[BB]);
-    uint32_t CProb = CycleProb[BB];
-    uint32_t Numerator = START_FREQ - CProb ? START_FREQ - CProb : 1;
-    divBlockFreq(BB, BranchProbability(Numerator, START_FREQ));
+    // This block is a loop header, so boost its frequency by the expected
+    // number of loop iterations. The loop blocks will be revisited so they all
+    // get this boost.
+    typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB);
+    assert(I != LoopExitProb.end() && "Loop header missing from table");
+    Freqs[BB] /= I->second;
+    DEBUG(dbgs() << "Loop header scaled to " << Freqs[BB] << ".\n");
   }
 
-  /// doLoop - Propagate block frequency down throught the loop.
+  /// doLoop - Propagate block frequency down through the loop.
   void doLoop(BlockT *Head, BlockT *Tail) {
     DEBUG(dbgs() << "doLoop(" << getBlockName(Head) << ", "
                  << getBlockName(Tail) << ")\n");
 
     SmallPtrSet<BlockT *, 8> BlocksInLoop;
 
-    for (rpot_iterator I = rpot_at(Head), E = rpot_end(); I != E; ++I) {
+    for (rpot_iterator I = rpot_at(Head), E = rpot_at(Tail); ; ++I) {
       BlockT *BB = *I;
       doBlock(BB, Head, BlocksInLoop);
 
       BlocksInLoop.insert(BB);
+      if (I == E)
+        break;
     }
 
     // Compute loop's cyclic probability using backedges probabilities.
+    BlockFrequency BackFreq;
     for (typename GT::ChildIteratorType
          PI = GraphTraits< Inverse<BlockT *> >::child_begin(Head),
          PE = GraphTraits< Inverse<BlockT *> >::child_end(Head);
          PI != PE; ++PI) {
       BlockT *Pred = *PI;
       assert(Pred);
-      if (isReachable(Pred) && isBackedge(Pred, Head)) {
-        BranchProbability Prob = getBackEdgeProbability(Pred, Head);
-        uint64_t N = Prob.getNumerator();
-        uint64_t D = Prob.getDenominator();
-        uint64_t Res = (N * START_FREQ) / D;
-
-        assert(Res <= UINT32_MAX);
-        CycleProb[Head] += (uint32_t) Res;
-      }
+      if (isBackedge(Pred, Head))
+        BackFreq += getEdgeFreq(Pred, Head);
     }
+
+    // The cyclic probability is freq(BackEdges) / freq(Head), where freq(Head)
+    // only counts edges entering the loop, not the loop backedges.
+    // The probability of leaving the loop on each iteration is:
+    //
+    //   ExitProb = 1 - CyclicProb
+    //
+    // The Expected number of loop iterations is:
+    //
+    //   Iterations = 1 / ExitProb
+    //
+    uint64_t D = std::max(getBlockFreq(Head).getFrequency(), UINT64_C(1));
+    uint64_t N = std::max(BackFreq.getFrequency(), UINT64_C(1));
+    if (N < D)
+      N = D - N;
+    else
+      // We'd expect N < D, but rounding and saturation means that can't be
+      // guaranteed.
+      N = 1;
+
+    // Now ExitProb = N / D, make sure it fits in an i32/i32 fraction.
+    assert(N <= D);
+    if (D > UINT32_MAX) {
+      unsigned Shift = 32 - countLeadingZeros(D);
+      D >>= Shift;
+      N >>= Shift;
+      if (N == 0)
+        N = 1;
+    }
+    BranchProbability LEP = BranchProbability(N, D);
+    LoopExitProb.insert(std::make_pair(Head, LEP));
+    DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP
+                 << " from 1 - " << BackFreq << " / " << getBlockFreq(Head)
+                 << ".\n");
   }
 
-  friend class BlockFrequency;
-  friend class MachineBlockFrequency;
+  friend class BlockFrequencyInfo;
+  friend class MachineBlockFrequencyInfo;
+
+  BlockFrequencyImpl() : EntryFreq(BlockFrequency::getEntryFrequency()) { }
 
   void doFunction(FunctionT *fn, BlockProbInfoT *bpi) {
     Fn = fn;
@@ -273,12 +272,12 @@ class BlockFrequencyImpl {
     // Clear everything.
     RPO.clear();
     POT.clear();
-    CycleProb.clear();
+    LoopExitProb.clear();
     Freqs.clear();
 
     BlockT *EntryBlock = fn->begin();
 
-    copy(po_begin(EntryBlock), po_end(EntryBlock), back_inserter(POT));
+    std::copy(po_begin(EntryBlock), po_end(EntryBlock), std::back_inserter(POT));
 
     unsigned RPOidx = 0;
     for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) {
@@ -299,8 +298,7 @@ class BlockFrequencyImpl {
            PI != PE; ++PI) {
 
         BlockT *Pred = *PI;
-        if (isReachable(Pred) && isBackedge(Pred, BB)
-            && (!LastTail || RPO[Pred] > RPO[LastTail]))
+        if (isBackedge(Pred, BB) && (!LastTail || RPO[Pred] > RPO[LastTail]))
           LastTail = Pred;
       }
 
@@ -314,13 +312,13 @@ class BlockFrequencyImpl {
   }
 
 public:
-  /// getBlockFreq - Return block frequency. Never return 0, value must be
-  /// positive.
-  uint32_t getBlockFreq(BlockT *BB) const {
-    typename DenseMap<BlockT *, uint32_t>::const_iterator I = Freqs.find(BB);
+  /// getBlockFreq - Return block frequency. Return 0 if we don't have it.
+  BlockFrequency getBlockFreq(const BlockT *BB) const {
+    typename DenseMap<const BlockT *, BlockFrequency>::const_iterator
+      I = Freqs.find(BB);
     if (I != Freqs.end())
-      return I->second ? I->second : 1;
-    return 1;
+      return I->second;
+    return 0;
   }
 
   void print(raw_ostream &OS) const {