X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=include%2Fllvm%2FAnalysis%2FBlockFrequencyInfoImpl.h;h=6c101a63e128c77ad5e1af8e51e6c7cb28c10183;hb=00552e3875ee5f382db6c98286a241a7d0efe1b8;hp=618b6e33663029d45af12e533cda0186caf5efa6;hpb=747b62f119668e2c4d8622242fbc4b581c34084e;p=oota-llvm.git

diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 618b6e33663..6c101a63e12 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -31,498 +31,25 @@
 
 #define DEBUG_TYPE "block-freq"
 
-//===----------------------------------------------------------------------===//
-//
-// ScaledNumber definition.
-//
-// TODO: Move to include/llvm/Support/ScaledNumber.h
-//
-//===----------------------------------------------------------------------===//
 namespace llvm {
 
-class ScaledNumberBase {
-public:
-  static const int32_t MaxScale = 16383;
-  static const int32_t MinScale = -16382;
-  static const int DefaultPrecision = 10;
-
-  static void dump(uint64_t D, int16_t E, int Width);
-  static raw_ostream &print(raw_ostream &OS, uint64_t D, int16_t E, int Width,
-                            unsigned Precision);
-  static std::string toString(uint64_t D, int16_t E, int Width,
-                              unsigned Precision);
-  static int countLeadingZeros32(uint32_t N) { return countLeadingZeros(N); }
-  static int countLeadingZeros64(uint64_t N) { return countLeadingZeros(N); }
-  static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); }
-
-  static std::pair<uint64_t, bool> splitSigned(int64_t N) {
-    if (N >= 0)
-      return std::make_pair(N, false);
-    uint64_t Unsigned = N == INT64_MIN ? UINT64_C(1) << 63 : uint64_t(-N);
-    return std::make_pair(Unsigned, true);
-  }
-  static int64_t joinSigned(uint64_t U, bool IsNeg) {
-    if (U > uint64_t(INT64_MAX))
-      return IsNeg ? INT64_MIN : INT64_MAX;
-    return IsNeg ? -int64_t(U) : int64_t(U);
-  }
-};
-
-/// \brief Simple representation of a scaled number.
-///
-/// ScaledNumber is a number represented by digits and a scale.  It uses simple
-/// saturation arithmetic and every operation is well-defined for every value.
-/// It's somewhat similar in behaviour to a soft-float, but is *not* a
-/// replacement for one.  If you're doing numerics, look at \a APFloat instead.
-/// Nevertheless, we've found these semantics useful for modelling certain cost
-/// metrics.
-///
-/// The number is split into a signed scale and unsigned digits.  The number
-/// represented is \c getDigits()*2^getScale().  In this way, the digits are
-/// much like the mantissa in the x87 long double, but there is no canonical
-/// form so the same number can be represented by many bit representations.
-///
-/// ScaledNumber is templated on the underlying integer type for digits, which
-/// is expected to be unsigned.
-///
-/// Unlike APFloat, ScaledNumber does not model architecture floating point
-/// behaviour -- while this might make it a little faster and easier to reason
-/// about, it certainly makes it more dangerous for general numerics.
-///
-/// ScaledNumber is totally ordered.  However, there is no canonical form, so
-/// there are multiple representations of most scalars.  E.g.:
-///
-///     ScaledNumber(8u, 0) == ScaledNumber(4u, 1)
-///     ScaledNumber(4u, 1) == ScaledNumber(2u, 2)
-///     ScaledNumber(2u, 2) == ScaledNumber(1u, 3)
-///
-/// ScaledNumber implements most arithmetic operations.  Precision is kept
-/// where possible.  Uses simple saturation arithmetic, so that operations
-/// saturate to 0.0 or getLargest() rather than under or overflowing.  It has
-/// some extra arithmetic for unit inversion.  0.0/0.0 is defined to be 0.0.
-/// Any other division by 0.0 is defined to be getLargest().
-///
-/// As a convenience for modifying the exponent, left and right shifting are
-/// both implemented, and both interpret negative shifts as positive shifts in
-/// the opposite direction.
-///
-/// Scales are limited to the range accepted by x87 long double.  This makes
-/// it trivial to add functionality to convert to APFloat (this is already
-/// relied on for the implementation of printing).
-///
-/// Possible (and conflicting) future directions:
-///
-///  1. Turn this into a wrapper around \a APFloat.
-///  2. Share the algorithm implementations with \a APFloat.
-///  3. Allow \a ScaledNumber to represent a signed number.
-template <class DigitsT> class ScaledNumber : ScaledNumberBase {
-public:
-  static_assert(!std::numeric_limits<DigitsT>::is_signed,
-                "only unsigned floats supported");
-
-  typedef DigitsT DigitsType;
-
-private:
-  typedef std::numeric_limits<DigitsType> DigitsLimits;
-
-  static const int Width = sizeof(DigitsType) * 8;
-  static_assert(Width <= 64, "invalid integer width for digits");
-
-private:
-  DigitsType Digits;
-  int16_t Scale;
-
-public:
-  ScaledNumber() : Digits(0), Scale(0) {}
-
-  ScaledNumber(DigitsType Digits, int16_t Scale)
-      : Digits(Digits), Scale(Scale) {}
-
-private:
-  ScaledNumber(const std::pair<uint64_t, int16_t> &X)
-      : Digits(X.first), Scale(X.second) {}
-
-public:
-  static ScaledNumber getZero() { return ScaledNumber(0, 0); }
-  static ScaledNumber getOne() { return ScaledNumber(1, 0); }
-  static ScaledNumber getLargest() {
-    return ScaledNumber(DigitsLimits::max(), MaxScale);
-  }
-  static ScaledNumber getFloat(uint64_t N) { return adjustToWidth(N, 0); }
-  static ScaledNumber getInverseFloat(uint64_t N) {
-    return getFloat(N).invert();
-  }
-  static ScaledNumber getFraction(DigitsType N, DigitsType D) {
-    return getQuotient(N, D);
-  }
-
-  int16_t getScale() const { return Scale; }
-  DigitsType getDigits() const { return Digits; }
-
-  /// \brief Convert to the given integer type.
-  ///
-  /// Convert to \c IntT using simple saturating arithmetic, truncating if
-  /// necessary.
-  template <class IntT> IntT toInt() const;
-
-  bool isZero() const { return !Digits; }
-  bool isLargest() const { return *this == getLargest(); }
-  bool isOne() const {
-    if (Scale > 0 || Scale <= -Width)
-      return false;
-    return Digits == DigitsType(1) << -Scale;
-  }
-
-  /// \brief The log base 2, rounded.
-  ///
-  /// Get the lg of the scalar.  lg 0 is defined to be INT32_MIN.
-  int32_t lg() const { return ScaledNumbers::getLg(Digits, Scale); }
-
-  /// \brief The log base 2, rounded towards INT32_MIN.
-  ///
-  /// Get the lg floor.  lg 0 is defined to be INT32_MIN.
-  int32_t lgFloor() const { return ScaledNumbers::getLgFloor(Digits, Scale); }
-
-  /// \brief The log base 2, rounded towards INT32_MAX.
-  ///
-  /// Get the lg ceiling.  lg 0 is defined to be INT32_MIN.
-  int32_t lgCeiling() const {
-    return ScaledNumbers::getLgCeiling(Digits, Scale);
-  }
-
-  bool operator==(const ScaledNumber &X) const { return compare(X) == 0; }
-  bool operator<(const ScaledNumber &X) const { return compare(X) < 0; }
-  bool operator!=(const ScaledNumber &X) const { return compare(X) != 0; }
-  bool operator>(const ScaledNumber &X) const { return compare(X) > 0; }
-  bool operator<=(const ScaledNumber &X) const { return compare(X) <= 0; }
-  bool operator>=(const ScaledNumber &X) const { return compare(X) >= 0; }
-
-  bool operator!() const { return isZero(); }
-
-  /// \brief Convert to a decimal representation in a string.
-  ///
-  /// Convert to a string.  Uses scientific notation for very large/small
-  /// numbers.  Scientific notation is used roughly for numbers outside of the
-  /// range 2^-64 through 2^64.
-  ///
-  /// \c Precision indicates the number of decimal digits of precision to use;
-  /// 0 requests the maximum available.
-  ///
-  /// As a special case to make debugging easier, if the number is small enough
-  /// to convert without scientific notation and has more than \c Precision
-  /// digits before the decimal place, it's printed accurately to the first
-  /// digit past zero.  E.g., assuming 10 digits of precision:
-  ///
-  ///     98765432198.7654... => 98765432198.8
-  ///      8765432198.7654... =>  8765432198.8
-  ///       765432198.7654... =>   765432198.8
-  ///        65432198.7654... =>    65432198.77
-  ///         5432198.7654... =>     5432198.765
-  std::string toString(unsigned Precision = DefaultPrecision) {
-    return ScaledNumberBase::toString(Digits, Scale, Width, Precision);
-  }
-
-  /// \brief Print a decimal representation.
-  ///
-  /// Print a string.  See toString for documentation.
-  raw_ostream &print(raw_ostream &OS,
-                     unsigned Precision = DefaultPrecision) const {
-    return ScaledNumberBase::print(OS, Digits, Scale, Width, Precision);
-  }
-  void dump() const { return ScaledNumberBase::dump(Digits, Scale, Width); }
-
-  ScaledNumber &operator+=(const ScaledNumber &X) {
-    std::tie(Digits, Scale) =
-        ScaledNumbers::getSum(Digits, Scale, X.Digits, X.Scale);
-    // Check for exponent past MaxScale.
-    if (Scale > MaxScale)
-      *this = getLargest();
-    return *this;
-  }
-  ScaledNumber &operator-=(const ScaledNumber &X) {
-    std::tie(Digits, Scale) =
-        ScaledNumbers::getDifference(Digits, Scale, X.Digits, X.Scale);
-    return *this;
-  }
-  ScaledNumber &operator*=(const ScaledNumber &X);
-  ScaledNumber &operator/=(const ScaledNumber &X);
-  ScaledNumber &operator<<=(int16_t Shift) {
-    shiftLeft(Shift);
-    return *this;
-  }
-  ScaledNumber &operator>>=(int16_t Shift) {
-    shiftRight(Shift);
-    return *this;
-  }
-
-private:
-  void shiftLeft(int32_t Shift);
-  void shiftRight(int32_t Shift);
-
-  /// \brief Adjust two floats to have matching exponents.
-  ///
-  /// Adjust \c this and \c X to have matching exponents.  Returns the new \c X
-  /// by value.  Does nothing if \a isZero() for either.
-  ///
-  /// The value that compares smaller will lose precision, and possibly become
-  /// \a isZero().
-  ScaledNumber matchScales(ScaledNumber X) {
-    ScaledNumbers::matchScales(Digits, Scale, X.Digits, X.Scale);
-    return X;
-  }
-
-public:
-  /// \brief Scale a large number accurately.
-  ///
-  /// Scale N (multiply it by this).  Uses full precision multiplication, even
-  /// if Width is smaller than 64, so information is not lost.
-  uint64_t scale(uint64_t N) const;
-  uint64_t scaleByInverse(uint64_t N) const {
-    // TODO: implement directly, rather than relying on inverse.  Inverse is
-    // expensive.
-    return inverse().scale(N);
-  }
-  int64_t scale(int64_t N) const {
-    std::pair<uint64_t, bool> Unsigned = splitSigned(N);
-    return joinSigned(scale(Unsigned.first), Unsigned.second);
-  }
-  int64_t scaleByInverse(int64_t N) const {
-    std::pair<uint64_t, bool> Unsigned = splitSigned(N);
-    return joinSigned(scaleByInverse(Unsigned.first), Unsigned.second);
-  }
-
-  int compare(const ScaledNumber &X) const {
-    return ScaledNumbers::compare(Digits, Scale, X.Digits, X.Scale);
-  }
-  int compareTo(uint64_t N) const {
-    ScaledNumber Float = getFloat(N);
-    int Compare = compare(Float);
-    if (Width == 64 || Compare != 0)
-      return Compare;
-
-    // Check for precision loss.  We know *this == RoundTrip.
-    uint64_t RoundTrip = Float.template toInt<uint64_t>();
-    return N == RoundTrip ? 0 : RoundTrip < N ? -1 : 1;
-  }
-  int compareTo(int64_t N) const { return N < 0 ? 1 : compareTo(uint64_t(N)); }
-
-  ScaledNumber &invert() { return *this = ScaledNumber::getFloat(1) / *this; }
-  ScaledNumber inverse() const { return ScaledNumber(*this).invert(); }
-
-private:
-  static ScaledNumber getProduct(DigitsType LHS, DigitsType RHS) {
-    return ScaledNumbers::getProduct(LHS, RHS);
-  }
-  static ScaledNumber getQuotient(DigitsType Dividend, DigitsType Divisor) {
-    return ScaledNumbers::getQuotient(Dividend, Divisor);
-  }
-
-  static int countLeadingZerosWidth(DigitsType Digits) {
-    if (Width == 64)
-      return countLeadingZeros64(Digits);
-    if (Width == 32)
-      return countLeadingZeros32(Digits);
-    return countLeadingZeros32(Digits) + Width - 32;
-  }
-
-  /// \brief Adjust a number to width, rounding up if necessary.
-  ///
-  /// Should only be called for \c Shift close to zero.
-  ///
-  /// \pre Shift >= MinScale && Shift + 64 <= MaxScale.
-  static ScaledNumber adjustToWidth(uint64_t N, int32_t Shift) {
-    assert(Shift >= MinScale && "Shift should be close to 0");
-    assert(Shift <= MaxScale - 64 && "Shift should be close to 0");
-    auto Adjusted = ScaledNumbers::getAdjusted<DigitsT>(N, Shift);
-    return Adjusted;
-  }
-
-  static ScaledNumber getRounded(ScaledNumber P, bool Round) {
-    // Saturate.
-    if (P.isLargest())
-      return P;
-
-    return ScaledNumbers::getRounded(P.Digits, P.Scale, Round);
-  }
-};
-
-#define SCALED_NUMBER_BOP(op, base)                                            \
-  template <class DigitsT>                                                     \
-  ScaledNumber<DigitsT> operator op(const ScaledNumber<DigitsT> &L,            \
-                                    const ScaledNumber<DigitsT> &R) {          \
-    return ScaledNumber<DigitsT>(L) base R;                                    \
-  }
-SCALED_NUMBER_BOP(+, += )
-SCALED_NUMBER_BOP(-, -= )
-SCALED_NUMBER_BOP(*, *= )
-SCALED_NUMBER_BOP(/, /= )
-SCALED_NUMBER_BOP(<<, <<= )
-SCALED_NUMBER_BOP(>>, >>= )
-#undef SCALED_NUMBER_BOP
-
-template <class DigitsT>
-raw_ostream &operator<<(raw_ostream &OS, const ScaledNumber<DigitsT> &X) {
-  return X.print(OS, 10);
-}
-
-#define SCALED_NUMBER_COMPARE_TO_TYPE(op, T1, T2)                              \
-  template <class DigitsT>                                                     \
-  bool operator op(const ScaledNumber<DigitsT> &L, T1 R) {                     \
-    return L.compareTo(T2(R)) op 0;                                            \
-  }                                                                            \
-  template <class DigitsT>                                                     \
-  bool operator op(T1 L, const ScaledNumber<DigitsT> &R) {                     \
-    return 0 op R.compareTo(T2(L));                                            \
-  }
-#define SCALED_NUMBER_COMPARE_TO(op)                                           \
-  SCALED_NUMBER_COMPARE_TO_TYPE(op, uint64_t, uint64_t)                        \
-  SCALED_NUMBER_COMPARE_TO_TYPE(op, uint32_t, uint64_t)                        \
-  SCALED_NUMBER_COMPARE_TO_TYPE(op, int64_t, int64_t)                          \
-  SCALED_NUMBER_COMPARE_TO_TYPE(op, int32_t, int64_t)
-SCALED_NUMBER_COMPARE_TO(< )
-SCALED_NUMBER_COMPARE_TO(> )
-SCALED_NUMBER_COMPARE_TO(== )
-SCALED_NUMBER_COMPARE_TO(!= )
-SCALED_NUMBER_COMPARE_TO(<= )
-SCALED_NUMBER_COMPARE_TO(>= )
-#undef SCALED_NUMBER_COMPARE_TO
-#undef SCALED_NUMBER_COMPARE_TO_TYPE
-
-template <class DigitsT>
-uint64_t ScaledNumber<DigitsT>::scale(uint64_t N) const {
-  if (Width == 64 || N <= DigitsLimits::max())
-    return (getFloat(N) * *this).template toInt<uint64_t>();
-
-  // Defer to the 64-bit version.
-  return ScaledNumber<uint64_t>(Digits, Scale).scale(N);
-}
-
-template <class DigitsT>
-template <class IntT>
-IntT ScaledNumber<DigitsT>::toInt() const {
-  typedef std::numeric_limits<IntT> Limits;
-  if (*this < 1)
-    return 0;
-  if (*this >= Limits::max())
-    return Limits::max();
-
-  IntT N = Digits;
-  if (Scale > 0) {
-    assert(size_t(Scale) < sizeof(IntT) * 8);
-    return N << Scale;
-  }
-  if (Scale < 0) {
-    assert(size_t(-Scale) < sizeof(IntT) * 8);
-    return N >> -Scale;
-  }
-  return N;
-}
-
-template <class DigitsT>
-ScaledNumber<DigitsT> &ScaledNumber<DigitsT>::
-operator*=(const ScaledNumber &X) {
-  if (isZero())
-    return *this;
-  if (X.isZero())
-    return *this = X;
-
-  // Save the exponents.
-  int32_t Scales = int32_t(Scale) + int32_t(X.Scale);
-
-  // Get the raw product.
-  *this = getProduct(Digits, X.Digits);
-
-  // Combine with exponents.
-  return *this <<= Scales;
-}
-template <class DigitsT>
-ScaledNumber<DigitsT> &ScaledNumber<DigitsT>::
-operator/=(const ScaledNumber &X) {
-  if (isZero())
-    return *this;
-  if (X.isZero())
-    return *this = getLargest();
-
-  // Save the exponents.
-  int32_t Scales = int32_t(Scale) - int32_t(X.Scale);
-
-  // Get the raw quotient.
-  *this = getQuotient(Digits, X.Digits);
-
-  // Combine with exponents.
-  return *this <<= Scales;
-}
-template <class DigitsT> void ScaledNumber<DigitsT>::shiftLeft(int32_t Shift) {
-  if (!Shift || isZero())
-    return;
-  assert(Shift != INT32_MIN);
-  if (Shift < 0) {
-    shiftRight(-Shift);
-    return;
-  }
-
-  // Shift as much as we can in the exponent.
-  int32_t ScaleShift = std::min(Shift, MaxScale - Scale);
-  Scale += ScaleShift;
-  if (ScaleShift == Shift)
-    return;
-
-  // Check this late, since it's rare.
-  if (isLargest())
-    return;
-
-  // Shift the digits themselves.
-  Shift -= ScaleShift;
-  if (Shift > countLeadingZerosWidth(Digits)) {
-    // Saturate.
-    *this = getLargest();
-    return;
-  }
-
-  Digits <<= Shift;
-  return;
-}
-
-template <class DigitsT> void ScaledNumber<DigitsT>::shiftRight(int32_t Shift) {
-  if (!Shift || isZero())
-    return;
-  assert(Shift != INT32_MIN);
-  if (Shift < 0) {
-    shiftLeft(-Shift);
-    return;
-  }
-
-  // Shift as much as we can in the exponent.
-  int32_t ScaleShift = std::min(Shift, Scale - MinScale);
-  Scale -= ScaleShift;
-  if (ScaleShift == Shift)
-    return;
-
-  // Shift the digits themselves.
-  Shift -= ScaleShift;
-  if (Shift >= Width) {
-    // Saturate.
-    *this = getZero();
-    return;
-  }
+class BasicBlock;
+class BranchProbabilityInfo;
+class Function;
+class Loop;
+class LoopInfo;
+class MachineBasicBlock;
+class MachineBranchProbabilityInfo;
+class MachineFunction;
+class MachineLoop;
+class MachineLoopInfo;
 
-  Digits >>= Shift;
-  return;
-}
+namespace bfi_detail {
 
-template <class T> struct isPodLike<ScaledNumber<T>> {
-  static const bool value = true;
-};
-}
+struct IrreducibleGraph;
 
-//===----------------------------------------------------------------------===//
-//
-// BlockMass definition.
-//
-// TODO: Make this private to BlockFrequencyInfoImpl or delete.
-//
-//===----------------------------------------------------------------------===//
-namespace llvm {
+// This is part of a workaround for a GCC 4.7 crash on lambdas.
+template <class BT> struct BlockEdgesAdder;
 
 /// \brief Mass of a block.
 ///
@@ -585,11 +112,11 @@ public:
   bool operator<(const BlockMass &X) const { return Mass < X.Mass; }
   bool operator>(const BlockMass &X) const { return Mass > X.Mass; }
 
-  /// \brief Convert to floating point.
+  /// \brief Convert to scaled number.
   ///
-  /// Convert to a float.  \a isFull() gives 1.0, while \a isEmpty() gives
-  /// slightly above 0.0.
-  ScaledNumber<uint64_t> toFloat() const;
+  /// Convert to \a ScaledNumber.  \a isFull() gives 1.0, while \a isEmpty()
+  /// gives slightly above 0.0.
+  ScaledNumber<uint64_t> toScaled() const;
 
   void dump() const;
   raw_ostream &print(raw_ostream &OS) const;
@@ -612,35 +139,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const BlockMass &X) {
   return X.print(OS);
 }
 
-template <> struct isPodLike<BlockMass> {
+} // end namespace bfi_detail
+
+template <> struct isPodLike<bfi_detail::BlockMass> {
   static const bool value = true;
 };
-}
-
-//===----------------------------------------------------------------------===//
-//
-// BlockFrequencyInfoImpl definition.
-//
-//===----------------------------------------------------------------------===//
-namespace llvm {
-
-class BasicBlock;
-class BranchProbabilityInfo;
-class Function;
-class Loop;
-class LoopInfo;
-class MachineBasicBlock;
-class MachineBranchProbabilityInfo;
-class MachineFunction;
-class MachineLoop;
-class MachineLoopInfo;
-
-namespace bfi_detail {
-struct IrreducibleGraph;
-
-// This is part of a workaround for a GCC 4.7 crash on lambdas.
-template <class BT> struct BlockEdgesAdder;
-}
 
 /// \brief Base class for BlockFrequencyInfoImpl
 ///
@@ -652,7 +155,8 @@ template <class BT> struct BlockEdgesAdder;
 /// BlockFrequencyInfoImpl.  See there for details.
 class BlockFrequencyInfoImplBase {
 public:
-  typedef ScaledNumber<uint64_t> Float;
+  typedef ScaledNumber<uint64_t> Scaled64;
+  typedef bfi_detail::BlockMass BlockMass;
 
   /// \brief Representative of a block.
   ///
@@ -681,34 +185,37 @@ public:
 
   /// \brief Stats about a block itself.
   struct FrequencyData {
-    Float Floating;
+    Scaled64 Scaled;
     uint64_t Integer;
   };
 
   /// \brief Data about a loop.
   ///
-  /// Contains the data necessary to represent represent a loop as a
-  /// pseudo-node once it's packaged.
+  /// Contains the data necessary to represent a loop as a pseudo-node once it's
+  /// packaged.
   struct LoopData {
     typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap;
     typedef SmallVector<BlockNode, 4> NodeList;
-    LoopData *Parent;       ///< The parent loop.
-    bool IsPackaged;        ///< Whether this has been packaged.
-    uint32_t NumHeaders;    ///< Number of headers.
-    ExitMap Exits;          ///< Successor edges (and weights).
-    NodeList Nodes;         ///< Header and the members of the loop.
-    BlockMass BackedgeMass; ///< Mass returned to loop header.
+    typedef SmallVector<BlockMass, 1> HeaderMassList;
+    LoopData *Parent;            ///< The parent loop.
+    bool IsPackaged;             ///< Whether this has been packaged.
+    uint32_t NumHeaders;         ///< Number of headers.
+    ExitMap Exits;               ///< Successor edges (and weights).
+    NodeList Nodes;              ///< Header and the members of the loop.
+    HeaderMassList BackedgeMass; ///< Mass returned to each loop header.
     BlockMass Mass;
-    Float Scale;
+    Scaled64 Scale;
 
     LoopData(LoopData *Parent, const BlockNode &Header)
-        : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header) {}
+        : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header),
+          BackedgeMass(1) {}
     template <class It1, class It2>
     LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther,
              It2 LastOther)
         : Parent(Parent), IsPackaged(false), Nodes(FirstHeader, LastHeader) {
       NumHeaders = Nodes.size();
       Nodes.insert(Nodes.end(), FirstOther, LastOther);
+      BackedgeMass.resize(NumHeaders);
     }
     bool isHeader(const BlockNode &Node) const {
       if (isIrreducible())
@@ -719,6 +226,14 @@ public:
     BlockNode getHeader() const { return Nodes[0]; }
     bool isIrreducible() const { return NumHeaders > 1; }
 
+    HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) {
+      assert(isHeader(B) && "this is only valid on loop header blocks");
+      if (isIrreducible())
+        return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) -
+               Nodes.begin();
+      return 0;
+    }
+
     NodeList::const_iterator members_begin() const {
       return Nodes.begin() + NumHeaders;
     }
@@ -756,7 +271,7 @@ public:
     /// loop.
     ///
     /// This function should only be called when distributing mass.  As long as
-    /// there are no irreducilbe edges to Node, then it will have complexity
+    /// there are no irreducible edges to Node, then it will have complexity
     /// O(1) in this context.
     ///
     /// In general, the complexity is O(L), where L is the number of loop
@@ -818,6 +333,8 @@ public:
     BlockNode TargetNode;
     uint64_t Amount;
     Weight() : Type(Local), Amount(0) {}
+    Weight(DistType Type, BlockNode TargetNode, uint64_t Amount)
+        : Type(Type), TargetNode(TargetNode), Amount(Amount) {}
   };
 
   /// \brief Distribution of unscaled probability weight.
@@ -925,6 +442,16 @@ public:
   /// \brief Compute the loop scale for a loop.
   void computeLoopScale(LoopData &Loop);
 
+  /// Adjust the mass of all headers in an irreducible loop.
+  ///
+  /// Initially, irreducible loops are assumed to distribute their mass
+  /// equally among its headers. This can lead to wrong frequency estimates
+  /// since some headers may be executed more frequently than others.
+  ///
+  /// This adjusts header mass distribution so it matches the weights of
+  /// the backedges going into each of the loop headers.
+  void adjustLoopHeaderMass(LoopData &Loop);
+
   /// \brief Package up a loop.
   void packageLoop(LoopData &Loop);
 
@@ -946,7 +473,7 @@ public:
   virtual raw_ostream &print(raw_ostream &OS) const { return OS; }
   void dump() const { print(dbgs()); }
 
-  Float getFloatingBlockFreq(const BlockNode &Node) const;
+  Scaled64 getFloatingBlockFreq(const BlockNode &Node) const;
 
   BlockFrequency getBlockFreq(const BlockNode &Node) const;
 
@@ -1189,6 +716,17 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
 ///         - Distribute the mass accordingly, dithering to minimize mass loss,
 ///           as described in \a distributeMass().
 ///
+///     In the case of irreducible loops, instead of a single loop header,
+///     there will be several. The computation of backedge masses is similar
+///     but instead of having a single backedge mass, there will be one
+///     backedge per loop header. In these cases, each backedge will carry
+///     a mass proportional to the edge weights along the corresponding
+///     path.
+///
+///     At the end of propagation, the full mass assigned to the loop will be
+///     distributed among the loop headers proportionally according to the
+///     mass flowing through their backedges.
+///
 ///     Finally, calculate the loop scale from the accumulated backedge mass.
 ///
 ///  3. Distribute mass in the function (\a computeMassInFunction()).
@@ -1212,9 +750,6 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
 ///
 /// It has some known flaws.
 ///
-///   - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting
-///     BlockFrequency's 64-bit integer precision.
-///
 ///   - The model of irreducible control flow is a rough approximation.
 ///
 ///     Modelling irreducible control flow exactly involves setting up and
@@ -1232,11 +767,6 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
 ///         as sub-loops, rather than arbitrarily shoving the problematic
 ///         blocks into the headers of the main irreducible SCC.
 ///
-///       - Backedge frequencies are assumed to be evenly split between the
-///         headers of a given irreducible SCC.  Instead, we could track the
-///         backedge mass separately for each header, and adjust their relative
-///         frequencies.
-///
 ///       - Entry frequencies are assumed to be evenly split between the
 ///         headers of a given irreducible SCC, which is the only option if we
 ///         need to compute mass in the SCC before its parent loop.  Instead,
@@ -1343,7 +873,7 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
   ///
   /// \pre \a computeMassInLoop() has been called for each subloop of \c
   /// OuterLoop.
-  /// \pre \c Insert points at the the last loop successfully processed by \a
+  /// \pre \c Insert points at the last loop successfully processed by \a
   /// computeMassInLoop().
   /// \pre \c OuterLoop has irreducible SCCs.
   void computeIrreducibleMass(LoopData *OuterLoop,
@@ -1375,15 +905,15 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
 public:
   const FunctionT *getFunction() const { return F; }
 
-  void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI,
-                  const LoopInfoT *LI);
+  void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI,
+                 const LoopInfoT &LI);
   BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {}
 
   using BlockFrequencyInfoImplBase::getEntryFreq;
   BlockFrequency getBlockFreq(const BlockT *BB) const {
     return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB));
   }
-  Float getFloatingBlockFreq(const BlockT *BB) const {
+  Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
     return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB));
   }
 
@@ -1408,13 +938,13 @@ public:
 };
 
 template <class BT>
-void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
-                                            const BranchProbabilityInfoT *BPI,
-                                            const LoopInfoT *LI) {
+void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F,
+                                           const BranchProbabilityInfoT &BPI,
+                                           const LoopInfoT &LI) {
   // Save the parameters.
-  this->BPI = BPI;
-  this->LI = LI;
-  this->F = F;
+  this->BPI = &BPI;
+  this->LI = &LI;
+  this->F = &F;
 
   // Clean up left-over data structures.
   BlockFrequencyInfoImplBase::clear();
@@ -1422,12 +952,12 @@ void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
   Nodes.clear();
 
   // Initialize.
-  DEBUG(dbgs() << "\nblock-frequency: " << F->getName() << "\n================="
-               << std::string(F->getName().size(), '=') << "\n");
+  DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n================="
+               << std::string(F.getName().size(), '=') << "\n");
   initializeRPOT();
   initializeLoops();
 
-  // Visit loops in post-order to find thelocal mass distribution, and then do
+  // Visit loops in post-order to find the local mass distribution, and then do
   // the full function.
   computeMassInLoops();
   computeMassInFunction();
@@ -1539,6 +1069,8 @@ bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
     for (const BlockNode &M : Loop.Nodes)
       if (!propagateMassToSuccessors(&Loop, M))
         llvm_unreachable("unhandled irreducible control flow");
+
+    adjustLoopHeaderMass(Loop);
   } else {
     Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
     if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
@@ -1667,7 +1199,8 @@ raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const {
   OS << "\n";
   return OS;
 }
-}
+
+} // end namespace llvm
 
 #undef DEBUG_TYPE