X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=include%2Fllvm%2FAnalysis%2FBlockFrequencyInfoImpl.h;h=6c101a63e128c77ad5e1af8e51e6c7cb28c10183;hb=00552e3875ee5f382db6c98286a241a7d0efe1b8;hp=618b6e33663029d45af12e533cda0186caf5efa6;hpb=747b62f119668e2c4d8622242fbc4b581c34084e;p=oota-llvm.git diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 618b6e33663..6c101a63e12 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -31,498 +31,25 @@ #define DEBUG_TYPE "block-freq" -//===----------------------------------------------------------------------===// -// -// ScaledNumber definition. -// -// TODO: Move to include/llvm/Support/ScaledNumber.h -// -//===----------------------------------------------------------------------===// namespace llvm { -class ScaledNumberBase { -public: - static const int32_t MaxScale = 16383; - static const int32_t MinScale = -16382; - static const int DefaultPrecision = 10; - - static void dump(uint64_t D, int16_t E, int Width); - static raw_ostream &print(raw_ostream &OS, uint64_t D, int16_t E, int Width, - unsigned Precision); - static std::string toString(uint64_t D, int16_t E, int Width, - unsigned Precision); - static int countLeadingZeros32(uint32_t N) { return countLeadingZeros(N); } - static int countLeadingZeros64(uint64_t N) { return countLeadingZeros(N); } - static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); } - - static std::pair splitSigned(int64_t N) { - if (N >= 0) - return std::make_pair(N, false); - uint64_t Unsigned = N == INT64_MIN ? UINT64_C(1) << 63 : uint64_t(-N); - return std::make_pair(Unsigned, true); - } - static int64_t joinSigned(uint64_t U, bool IsNeg) { - if (U > uint64_t(INT64_MAX)) - return IsNeg ? INT64_MIN : INT64_MAX; - return IsNeg ? -int64_t(U) : int64_t(U); - } -}; - -/// \brief Simple representation of a scaled number. -/// -/// ScaledNumber is a number represented by digits and a scale. It uses simple -/// saturation arithmetic and every operation is well-defined for every value. -/// It's somewhat similar in behaviour to a soft-float, but is *not* a -/// replacement for one. If you're doing numerics, look at \a APFloat instead. -/// Nevertheless, we've found these semantics useful for modelling certain cost -/// metrics. -/// -/// The number is split into a signed scale and unsigned digits. The number -/// represented is \c getDigits()*2^getScale(). In this way, the digits are -/// much like the mantissa in the x87 long double, but there is no canonical -/// form so the same number can be represented by many bit representations. -/// -/// ScaledNumber is templated on the underlying integer type for digits, which -/// is expected to be unsigned. -/// -/// Unlike APFloat, ScaledNumber does not model architecture floating point -/// behaviour -- while this might make it a little faster and easier to reason -/// about, it certainly makes it more dangerous for general numerics. -/// -/// ScaledNumber is totally ordered. However, there is no canonical form, so -/// there are multiple representations of most scalars. E.g.: -/// -/// ScaledNumber(8u, 0) == ScaledNumber(4u, 1) -/// ScaledNumber(4u, 1) == ScaledNumber(2u, 2) -/// ScaledNumber(2u, 2) == ScaledNumber(1u, 3) -/// -/// ScaledNumber implements most arithmetic operations. Precision is kept -/// where possible. Uses simple saturation arithmetic, so that operations -/// saturate to 0.0 or getLargest() rather than under or overflowing. It has -/// some extra arithmetic for unit inversion. 0.0/0.0 is defined to be 0.0. -/// Any other division by 0.0 is defined to be getLargest(). -/// -/// As a convenience for modifying the exponent, left and right shifting are -/// both implemented, and both interpret negative shifts as positive shifts in -/// the opposite direction. -/// -/// Scales are limited to the range accepted by x87 long double. This makes -/// it trivial to add functionality to convert to APFloat (this is already -/// relied on for the implementation of printing). -/// -/// Possible (and conflicting) future directions: -/// -/// 1. Turn this into a wrapper around \a APFloat. -/// 2. Share the algorithm implementations with \a APFloat. -/// 3. Allow \a ScaledNumber to represent a signed number. -template class ScaledNumber : ScaledNumberBase { -public: - static_assert(!std::numeric_limits::is_signed, - "only unsigned floats supported"); - - typedef DigitsT DigitsType; - -private: - typedef std::numeric_limits DigitsLimits; - - static const int Width = sizeof(DigitsType) * 8; - static_assert(Width <= 64, "invalid integer width for digits"); - -private: - DigitsType Digits; - int16_t Scale; - -public: - ScaledNumber() : Digits(0), Scale(0) {} - - ScaledNumber(DigitsType Digits, int16_t Scale) - : Digits(Digits), Scale(Scale) {} - -private: - ScaledNumber(const std::pair &X) - : Digits(X.first), Scale(X.second) {} - -public: - static ScaledNumber getZero() { return ScaledNumber(0, 0); } - static ScaledNumber getOne() { return ScaledNumber(1, 0); } - static ScaledNumber getLargest() { - return ScaledNumber(DigitsLimits::max(), MaxScale); - } - static ScaledNumber getFloat(uint64_t N) { return adjustToWidth(N, 0); } - static ScaledNumber getInverseFloat(uint64_t N) { - return getFloat(N).invert(); - } - static ScaledNumber getFraction(DigitsType N, DigitsType D) { - return getQuotient(N, D); - } - - int16_t getScale() const { return Scale; } - DigitsType getDigits() const { return Digits; } - - /// \brief Convert to the given integer type. - /// - /// Convert to \c IntT using simple saturating arithmetic, truncating if - /// necessary. - template IntT toInt() const; - - bool isZero() const { return !Digits; } - bool isLargest() const { return *this == getLargest(); } - bool isOne() const { - if (Scale > 0 || Scale <= -Width) - return false; - return Digits == DigitsType(1) << -Scale; - } - - /// \brief The log base 2, rounded. - /// - /// Get the lg of the scalar. lg 0 is defined to be INT32_MIN. - int32_t lg() const { return ScaledNumbers::getLg(Digits, Scale); } - - /// \brief The log base 2, rounded towards INT32_MIN. - /// - /// Get the lg floor. lg 0 is defined to be INT32_MIN. - int32_t lgFloor() const { return ScaledNumbers::getLgFloor(Digits, Scale); } - - /// \brief The log base 2, rounded towards INT32_MAX. - /// - /// Get the lg ceiling. lg 0 is defined to be INT32_MIN. - int32_t lgCeiling() const { - return ScaledNumbers::getLgCeiling(Digits, Scale); - } - - bool operator==(const ScaledNumber &X) const { return compare(X) == 0; } - bool operator<(const ScaledNumber &X) const { return compare(X) < 0; } - bool operator!=(const ScaledNumber &X) const { return compare(X) != 0; } - bool operator>(const ScaledNumber &X) const { return compare(X) > 0; } - bool operator<=(const ScaledNumber &X) const { return compare(X) <= 0; } - bool operator>=(const ScaledNumber &X) const { return compare(X) >= 0; } - - bool operator!() const { return isZero(); } - - /// \brief Convert to a decimal representation in a string. - /// - /// Convert to a string. Uses scientific notation for very large/small - /// numbers. Scientific notation is used roughly for numbers outside of the - /// range 2^-64 through 2^64. - /// - /// \c Precision indicates the number of decimal digits of precision to use; - /// 0 requests the maximum available. - /// - /// As a special case to make debugging easier, if the number is small enough - /// to convert without scientific notation and has more than \c Precision - /// digits before the decimal place, it's printed accurately to the first - /// digit past zero. E.g., assuming 10 digits of precision: - /// - /// 98765432198.7654... => 98765432198.8 - /// 8765432198.7654... => 8765432198.8 - /// 765432198.7654... => 765432198.8 - /// 65432198.7654... => 65432198.77 - /// 5432198.7654... => 5432198.765 - std::string toString(unsigned Precision = DefaultPrecision) { - return ScaledNumberBase::toString(Digits, Scale, Width, Precision); - } - - /// \brief Print a decimal representation. - /// - /// Print a string. See toString for documentation. - raw_ostream &print(raw_ostream &OS, - unsigned Precision = DefaultPrecision) const { - return ScaledNumberBase::print(OS, Digits, Scale, Width, Precision); - } - void dump() const { return ScaledNumberBase::dump(Digits, Scale, Width); } - - ScaledNumber &operator+=(const ScaledNumber &X) { - std::tie(Digits, Scale) = - ScaledNumbers::getSum(Digits, Scale, X.Digits, X.Scale); - // Check for exponent past MaxScale. - if (Scale > MaxScale) - *this = getLargest(); - return *this; - } - ScaledNumber &operator-=(const ScaledNumber &X) { - std::tie(Digits, Scale) = - ScaledNumbers::getDifference(Digits, Scale, X.Digits, X.Scale); - return *this; - } - ScaledNumber &operator*=(const ScaledNumber &X); - ScaledNumber &operator/=(const ScaledNumber &X); - ScaledNumber &operator<<=(int16_t Shift) { - shiftLeft(Shift); - return *this; - } - ScaledNumber &operator>>=(int16_t Shift) { - shiftRight(Shift); - return *this; - } - -private: - void shiftLeft(int32_t Shift); - void shiftRight(int32_t Shift); - - /// \brief Adjust two floats to have matching exponents. - /// - /// Adjust \c this and \c X to have matching exponents. Returns the new \c X - /// by value. Does nothing if \a isZero() for either. - /// - /// The value that compares smaller will lose precision, and possibly become - /// \a isZero(). - ScaledNumber matchScales(ScaledNumber X) { - ScaledNumbers::matchScales(Digits, Scale, X.Digits, X.Scale); - return X; - } - -public: - /// \brief Scale a large number accurately. - /// - /// Scale N (multiply it by this). Uses full precision multiplication, even - /// if Width is smaller than 64, so information is not lost. - uint64_t scale(uint64_t N) const; - uint64_t scaleByInverse(uint64_t N) const { - // TODO: implement directly, rather than relying on inverse. Inverse is - // expensive. - return inverse().scale(N); - } - int64_t scale(int64_t N) const { - std::pair Unsigned = splitSigned(N); - return joinSigned(scale(Unsigned.first), Unsigned.second); - } - int64_t scaleByInverse(int64_t N) const { - std::pair Unsigned = splitSigned(N); - return joinSigned(scaleByInverse(Unsigned.first), Unsigned.second); - } - - int compare(const ScaledNumber &X) const { - return ScaledNumbers::compare(Digits, Scale, X.Digits, X.Scale); - } - int compareTo(uint64_t N) const { - ScaledNumber Float = getFloat(N); - int Compare = compare(Float); - if (Width == 64 || Compare != 0) - return Compare; - - // Check for precision loss. We know *this == RoundTrip. - uint64_t RoundTrip = Float.template toInt(); - return N == RoundTrip ? 0 : RoundTrip < N ? -1 : 1; - } - int compareTo(int64_t N) const { return N < 0 ? 1 : compareTo(uint64_t(N)); } - - ScaledNumber &invert() { return *this = ScaledNumber::getFloat(1) / *this; } - ScaledNumber inverse() const { return ScaledNumber(*this).invert(); } - -private: - static ScaledNumber getProduct(DigitsType LHS, DigitsType RHS) { - return ScaledNumbers::getProduct(LHS, RHS); - } - static ScaledNumber getQuotient(DigitsType Dividend, DigitsType Divisor) { - return ScaledNumbers::getQuotient(Dividend, Divisor); - } - - static int countLeadingZerosWidth(DigitsType Digits) { - if (Width == 64) - return countLeadingZeros64(Digits); - if (Width == 32) - return countLeadingZeros32(Digits); - return countLeadingZeros32(Digits) + Width - 32; - } - - /// \brief Adjust a number to width, rounding up if necessary. - /// - /// Should only be called for \c Shift close to zero. - /// - /// \pre Shift >= MinScale && Shift + 64 <= MaxScale. - static ScaledNumber adjustToWidth(uint64_t N, int32_t Shift) { - assert(Shift >= MinScale && "Shift should be close to 0"); - assert(Shift <= MaxScale - 64 && "Shift should be close to 0"); - auto Adjusted = ScaledNumbers::getAdjusted(N, Shift); - return Adjusted; - } - - static ScaledNumber getRounded(ScaledNumber P, bool Round) { - // Saturate. - if (P.isLargest()) - return P; - - return ScaledNumbers::getRounded(P.Digits, P.Scale, Round); - } -}; - -#define SCALED_NUMBER_BOP(op, base) \ - template \ - ScaledNumber operator op(const ScaledNumber &L, \ - const ScaledNumber &R) { \ - return ScaledNumber(L) base R; \ - } -SCALED_NUMBER_BOP(+, += ) -SCALED_NUMBER_BOP(-, -= ) -SCALED_NUMBER_BOP(*, *= ) -SCALED_NUMBER_BOP(/, /= ) -SCALED_NUMBER_BOP(<<, <<= ) -SCALED_NUMBER_BOP(>>, >>= ) -#undef SCALED_NUMBER_BOP - -template -raw_ostream &operator<<(raw_ostream &OS, const ScaledNumber &X) { - return X.print(OS, 10); -} - -#define SCALED_NUMBER_COMPARE_TO_TYPE(op, T1, T2) \ - template \ - bool operator op(const ScaledNumber &L, T1 R) { \ - return L.compareTo(T2(R)) op 0; \ - } \ - template \ - bool operator op(T1 L, const ScaledNumber &R) { \ - return 0 op R.compareTo(T2(L)); \ - } -#define SCALED_NUMBER_COMPARE_TO(op) \ - SCALED_NUMBER_COMPARE_TO_TYPE(op, uint64_t, uint64_t) \ - SCALED_NUMBER_COMPARE_TO_TYPE(op, uint32_t, uint64_t) \ - SCALED_NUMBER_COMPARE_TO_TYPE(op, int64_t, int64_t) \ - SCALED_NUMBER_COMPARE_TO_TYPE(op, int32_t, int64_t) -SCALED_NUMBER_COMPARE_TO(< ) -SCALED_NUMBER_COMPARE_TO(> ) -SCALED_NUMBER_COMPARE_TO(== ) -SCALED_NUMBER_COMPARE_TO(!= ) -SCALED_NUMBER_COMPARE_TO(<= ) -SCALED_NUMBER_COMPARE_TO(>= ) -#undef SCALED_NUMBER_COMPARE_TO -#undef SCALED_NUMBER_COMPARE_TO_TYPE - -template -uint64_t ScaledNumber::scale(uint64_t N) const { - if (Width == 64 || N <= DigitsLimits::max()) - return (getFloat(N) * *this).template toInt(); - - // Defer to the 64-bit version. - return ScaledNumber(Digits, Scale).scale(N); -} - -template -template -IntT ScaledNumber::toInt() const { - typedef std::numeric_limits Limits; - if (*this < 1) - return 0; - if (*this >= Limits::max()) - return Limits::max(); - - IntT N = Digits; - if (Scale > 0) { - assert(size_t(Scale) < sizeof(IntT) * 8); - return N << Scale; - } - if (Scale < 0) { - assert(size_t(-Scale) < sizeof(IntT) * 8); - return N >> -Scale; - } - return N; -} - -template -ScaledNumber &ScaledNumber:: -operator*=(const ScaledNumber &X) { - if (isZero()) - return *this; - if (X.isZero()) - return *this = X; - - // Save the exponents. - int32_t Scales = int32_t(Scale) + int32_t(X.Scale); - - // Get the raw product. - *this = getProduct(Digits, X.Digits); - - // Combine with exponents. - return *this <<= Scales; -} -template -ScaledNumber &ScaledNumber:: -operator/=(const ScaledNumber &X) { - if (isZero()) - return *this; - if (X.isZero()) - return *this = getLargest(); - - // Save the exponents. - int32_t Scales = int32_t(Scale) - int32_t(X.Scale); - - // Get the raw quotient. - *this = getQuotient(Digits, X.Digits); - - // Combine with exponents. - return *this <<= Scales; -} -template void ScaledNumber::shiftLeft(int32_t Shift) { - if (!Shift || isZero()) - return; - assert(Shift != INT32_MIN); - if (Shift < 0) { - shiftRight(-Shift); - return; - } - - // Shift as much as we can in the exponent. - int32_t ScaleShift = std::min(Shift, MaxScale - Scale); - Scale += ScaleShift; - if (ScaleShift == Shift) - return; - - // Check this late, since it's rare. - if (isLargest()) - return; - - // Shift the digits themselves. - Shift -= ScaleShift; - if (Shift > countLeadingZerosWidth(Digits)) { - // Saturate. - *this = getLargest(); - return; - } - - Digits <<= Shift; - return; -} - -template void ScaledNumber::shiftRight(int32_t Shift) { - if (!Shift || isZero()) - return; - assert(Shift != INT32_MIN); - if (Shift < 0) { - shiftLeft(-Shift); - return; - } - - // Shift as much as we can in the exponent. - int32_t ScaleShift = std::min(Shift, Scale - MinScale); - Scale -= ScaleShift; - if (ScaleShift == Shift) - return; - - // Shift the digits themselves. - Shift -= ScaleShift; - if (Shift >= Width) { - // Saturate. - *this = getZero(); - return; - } +class BasicBlock; +class BranchProbabilityInfo; +class Function; +class Loop; +class LoopInfo; +class MachineBasicBlock; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineLoop; +class MachineLoopInfo; - Digits >>= Shift; - return; -} +namespace bfi_detail { -template struct isPodLike> { - static const bool value = true; -}; -} +struct IrreducibleGraph; -//===----------------------------------------------------------------------===// -// -// BlockMass definition. -// -// TODO: Make this private to BlockFrequencyInfoImpl or delete. -// -//===----------------------------------------------------------------------===// -namespace llvm { +// This is part of a workaround for a GCC 4.7 crash on lambdas. +template struct BlockEdgesAdder; /// \brief Mass of a block. /// @@ -585,11 +112,11 @@ public: bool operator<(const BlockMass &X) const { return Mass < X.Mass; } bool operator>(const BlockMass &X) const { return Mass > X.Mass; } - /// \brief Convert to floating point. + /// \brief Convert to scaled number. /// - /// Convert to a float. \a isFull() gives 1.0, while \a isEmpty() gives - /// slightly above 0.0. - ScaledNumber toFloat() const; + /// Convert to \a ScaledNumber. \a isFull() gives 1.0, while \a isEmpty() + /// gives slightly above 0.0. + ScaledNumber toScaled() const; void dump() const; raw_ostream &print(raw_ostream &OS) const; @@ -612,35 +139,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const BlockMass &X) { return X.print(OS); } -template <> struct isPodLike { +} // end namespace bfi_detail + +template <> struct isPodLike { static const bool value = true; }; -} - -//===----------------------------------------------------------------------===// -// -// BlockFrequencyInfoImpl definition. -// -//===----------------------------------------------------------------------===// -namespace llvm { - -class BasicBlock; -class BranchProbabilityInfo; -class Function; -class Loop; -class LoopInfo; -class MachineBasicBlock; -class MachineBranchProbabilityInfo; -class MachineFunction; -class MachineLoop; -class MachineLoopInfo; - -namespace bfi_detail { -struct IrreducibleGraph; - -// This is part of a workaround for a GCC 4.7 crash on lambdas. -template struct BlockEdgesAdder; -} /// \brief Base class for BlockFrequencyInfoImpl /// @@ -652,7 +155,8 @@ template struct BlockEdgesAdder; /// BlockFrequencyInfoImpl. See there for details. class BlockFrequencyInfoImplBase { public: - typedef ScaledNumber Float; + typedef ScaledNumber Scaled64; + typedef bfi_detail::BlockMass BlockMass; /// \brief Representative of a block. /// @@ -681,34 +185,37 @@ public: /// \brief Stats about a block itself. struct FrequencyData { - Float Floating; + Scaled64 Scaled; uint64_t Integer; }; /// \brief Data about a loop. /// - /// Contains the data necessary to represent represent a loop as a - /// pseudo-node once it's packaged. + /// Contains the data necessary to represent a loop as a pseudo-node once it's + /// packaged. struct LoopData { typedef SmallVector, 4> ExitMap; typedef SmallVector NodeList; - LoopData *Parent; ///< The parent loop. - bool IsPackaged; ///< Whether this has been packaged. - uint32_t NumHeaders; ///< Number of headers. - ExitMap Exits; ///< Successor edges (and weights). - NodeList Nodes; ///< Header and the members of the loop. - BlockMass BackedgeMass; ///< Mass returned to loop header. + typedef SmallVector HeaderMassList; + LoopData *Parent; ///< The parent loop. + bool IsPackaged; ///< Whether this has been packaged. + uint32_t NumHeaders; ///< Number of headers. + ExitMap Exits; ///< Successor edges (and weights). + NodeList Nodes; ///< Header and the members of the loop. + HeaderMassList BackedgeMass; ///< Mass returned to each loop header. BlockMass Mass; - Float Scale; + Scaled64 Scale; LoopData(LoopData *Parent, const BlockNode &Header) - : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header) {} + : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header), + BackedgeMass(1) {} template LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther, It2 LastOther) : Parent(Parent), IsPackaged(false), Nodes(FirstHeader, LastHeader) { NumHeaders = Nodes.size(); Nodes.insert(Nodes.end(), FirstOther, LastOther); + BackedgeMass.resize(NumHeaders); } bool isHeader(const BlockNode &Node) const { if (isIrreducible()) @@ -719,6 +226,14 @@ public: BlockNode getHeader() const { return Nodes[0]; } bool isIrreducible() const { return NumHeaders > 1; } + HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) { + assert(isHeader(B) && "this is only valid on loop header blocks"); + if (isIrreducible()) + return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) - + Nodes.begin(); + return 0; + } + NodeList::const_iterator members_begin() const { return Nodes.begin() + NumHeaders; } @@ -756,7 +271,7 @@ public: /// loop. /// /// This function should only be called when distributing mass. As long as - /// there are no irreducilbe edges to Node, then it will have complexity + /// there are no irreducible edges to Node, then it will have complexity /// O(1) in this context. /// /// In general, the complexity is O(L), where L is the number of loop @@ -818,6 +333,8 @@ public: BlockNode TargetNode; uint64_t Amount; Weight() : Type(Local), Amount(0) {} + Weight(DistType Type, BlockNode TargetNode, uint64_t Amount) + : Type(Type), TargetNode(TargetNode), Amount(Amount) {} }; /// \brief Distribution of unscaled probability weight. @@ -925,6 +442,16 @@ public: /// \brief Compute the loop scale for a loop. void computeLoopScale(LoopData &Loop); + /// Adjust the mass of all headers in an irreducible loop. + /// + /// Initially, irreducible loops are assumed to distribute their mass + /// equally among its headers. This can lead to wrong frequency estimates + /// since some headers may be executed more frequently than others. + /// + /// This adjusts header mass distribution so it matches the weights of + /// the backedges going into each of the loop headers. + void adjustLoopHeaderMass(LoopData &Loop); + /// \brief Package up a loop. void packageLoop(LoopData &Loop); @@ -946,7 +473,7 @@ public: virtual raw_ostream &print(raw_ostream &OS) const { return OS; } void dump() const { print(dbgs()); } - Float getFloatingBlockFreq(const BlockNode &Node) const; + Scaled64 getFloatingBlockFreq(const BlockNode &Node) const; BlockFrequency getBlockFreq(const BlockNode &Node) const; @@ -1189,6 +716,17 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, /// - Distribute the mass accordingly, dithering to minimize mass loss, /// as described in \a distributeMass(). /// +/// In the case of irreducible loops, instead of a single loop header, +/// there will be several. The computation of backedge masses is similar +/// but instead of having a single backedge mass, there will be one +/// backedge per loop header. In these cases, each backedge will carry +/// a mass proportional to the edge weights along the corresponding +/// path. +/// +/// At the end of propagation, the full mass assigned to the loop will be +/// distributed among the loop headers proportionally according to the +/// mass flowing through their backedges. +/// /// Finally, calculate the loop scale from the accumulated backedge mass. /// /// 3. Distribute mass in the function (\a computeMassInFunction()). @@ -1212,9 +750,6 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, /// /// It has some known flaws. /// -/// - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting -/// BlockFrequency's 64-bit integer precision. -/// /// - The model of irreducible control flow is a rough approximation. /// /// Modelling irreducible control flow exactly involves setting up and @@ -1232,11 +767,6 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, /// as sub-loops, rather than arbitrarily shoving the problematic /// blocks into the headers of the main irreducible SCC. /// -/// - Backedge frequencies are assumed to be evenly split between the -/// headers of a given irreducible SCC. Instead, we could track the -/// backedge mass separately for each header, and adjust their relative -/// frequencies. -/// /// - Entry frequencies are assumed to be evenly split between the /// headers of a given irreducible SCC, which is the only option if we /// need to compute mass in the SCC before its parent loop. Instead, @@ -1343,7 +873,7 @@ template class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { /// /// \pre \a computeMassInLoop() has been called for each subloop of \c /// OuterLoop. - /// \pre \c Insert points at the the last loop successfully processed by \a + /// \pre \c Insert points at the last loop successfully processed by \a /// computeMassInLoop(). /// \pre \c OuterLoop has irreducible SCCs. void computeIrreducibleMass(LoopData *OuterLoop, @@ -1375,15 +905,15 @@ template class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { public: const FunctionT *getFunction() const { return F; } - void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI, - const LoopInfoT *LI); + void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI, + const LoopInfoT &LI); BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {} using BlockFrequencyInfoImplBase::getEntryFreq; BlockFrequency getBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); } - Float getFloatingBlockFreq(const BlockT *BB) const { + Scaled64 getFloatingBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); } @@ -1408,13 +938,13 @@ public: }; template -void BlockFrequencyInfoImpl::doFunction(const FunctionT *F, - const BranchProbabilityInfoT *BPI, - const LoopInfoT *LI) { +void BlockFrequencyInfoImpl::calculate(const FunctionT &F, + const BranchProbabilityInfoT &BPI, + const LoopInfoT &LI) { // Save the parameters. - this->BPI = BPI; - this->LI = LI; - this->F = F; + this->BPI = &BPI; + this->LI = &LI; + this->F = &F; // Clean up left-over data structures. BlockFrequencyInfoImplBase::clear(); @@ -1422,12 +952,12 @@ void BlockFrequencyInfoImpl::doFunction(const FunctionT *F, Nodes.clear(); // Initialize. - DEBUG(dbgs() << "\nblock-frequency: " << F->getName() << "\n=================" - << std::string(F->getName().size(), '=') << "\n"); + DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n=================" + << std::string(F.getName().size(), '=') << "\n"); initializeRPOT(); initializeLoops(); - // Visit loops in post-order to find thelocal mass distribution, and then do + // Visit loops in post-order to find the local mass distribution, and then do // the full function. computeMassInLoops(); computeMassInFunction(); @@ -1539,6 +1069,8 @@ bool BlockFrequencyInfoImpl::computeMassInLoop(LoopData &Loop) { for (const BlockNode &M : Loop.Nodes) if (!propagateMassToSuccessors(&Loop, M)) llvm_unreachable("unhandled irreducible control flow"); + + adjustLoopHeaderMass(Loop); } else { Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); if (!propagateMassToSuccessors(&Loop, Loop.getHeader())) @@ -1667,7 +1199,8 @@ raw_ostream &BlockFrequencyInfoImpl::print(raw_ostream &OS) const { OS << "\n"; return OS; } -} + +} // end namespace llvm #undef DEBUG_TYPE