#define DEBUG_TYPE "block-freq"
-//===----------------------------------------------------------------------===//
-//
-// BlockMass definition.
-//
-// TODO: Make this private to BlockFrequencyInfoImpl or delete.
-//
-//===----------------------------------------------------------------------===//
namespace llvm {
+class BasicBlock;
+class BranchProbabilityInfo;
+class Function;
+class Loop;
+class LoopInfo;
+class MachineBasicBlock;
+class MachineBranchProbabilityInfo;
+class MachineFunction;
+class MachineLoop;
+class MachineLoopInfo;
+
+namespace bfi_detail {
+
+struct IrreducibleGraph;
+
+// This is part of a workaround for a GCC 4.7 crash on lambdas.
+template <class BT> struct BlockEdgesAdder;
+
/// \brief Mass of a block.
///
/// This class implements a sort of fixed-point fraction always between 0.0 and
return X.print(OS);
}
-template <> struct isPodLike<BlockMass> {
+} // end namespace bfi_detail
+
+template <> struct isPodLike<bfi_detail::BlockMass> {
static const bool value = true;
};
-} // end namespace llvm
-
-//===----------------------------------------------------------------------===//
-//
-// BlockFrequencyInfoImpl definition.
-//
-//===----------------------------------------------------------------------===//
-namespace llvm {
-
-class BasicBlock;
-class BranchProbabilityInfo;
-class Function;
-class Loop;
-class LoopInfo;
-class MachineBasicBlock;
-class MachineBranchProbabilityInfo;
-class MachineFunction;
-class MachineLoop;
-class MachineLoopInfo;
-
-namespace bfi_detail {
-struct IrreducibleGraph;
-
-// This is part of a workaround for a GCC 4.7 crash on lambdas.
-template <class BT> struct BlockEdgesAdder;
-}
-
/// \brief Base class for BlockFrequencyInfoImpl
///
/// BlockFrequencyInfoImplBase has supporting data structures and some
class BlockFrequencyInfoImplBase {
public:
typedef ScaledNumber<uint64_t> Scaled64;
+ typedef bfi_detail::BlockMass BlockMass;
/// \brief Representative of a block.
///
/// \brief Data about a loop.
///
- /// Contains the data necessary to represent represent a loop as a
- /// pseudo-node once it's packaged.
+ /// Contains the data necessary to represent a loop as a pseudo-node once it's
+ /// packaged.
struct LoopData {
typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap;
typedef SmallVector<BlockNode, 4> NodeList;
- LoopData *Parent; ///< The parent loop.
- bool IsPackaged; ///< Whether this has been packaged.
- uint32_t NumHeaders; ///< Number of headers.
- ExitMap Exits; ///< Successor edges (and weights).
- NodeList Nodes; ///< Header and the members of the loop.
- BlockMass BackedgeMass; ///< Mass returned to loop header.
+ typedef SmallVector<BlockMass, 1> HeaderMassList;
+ LoopData *Parent; ///< The parent loop.
+ bool IsPackaged; ///< Whether this has been packaged.
+ uint32_t NumHeaders; ///< Number of headers.
+ ExitMap Exits; ///< Successor edges (and weights).
+ NodeList Nodes; ///< Header and the members of the loop.
+ HeaderMassList BackedgeMass; ///< Mass returned to each loop header.
BlockMass Mass;
Scaled64 Scale;
LoopData(LoopData *Parent, const BlockNode &Header)
- : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header) {}
+ : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header),
+ BackedgeMass(1) {}
template <class It1, class It2>
LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther,
It2 LastOther)
: Parent(Parent), IsPackaged(false), Nodes(FirstHeader, LastHeader) {
NumHeaders = Nodes.size();
Nodes.insert(Nodes.end(), FirstOther, LastOther);
+ BackedgeMass.resize(NumHeaders);
}
bool isHeader(const BlockNode &Node) const {
if (isIrreducible())
BlockNode getHeader() const { return Nodes[0]; }
bool isIrreducible() const { return NumHeaders > 1; }
+ HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) {
+ assert(isHeader(B) && "this is only valid on loop header blocks");
+ if (isIrreducible())
+ return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) -
+ Nodes.begin();
+ return 0;
+ }
+
NodeList::const_iterator members_begin() const {
return Nodes.begin() + NumHeaders;
}
/// loop.
///
/// This function should only be called when distributing mass. As long as
- /// there are no irreducilbe edges to Node, then it will have complexity
+ /// there are no irreducible edges to Node, then it will have complexity
/// O(1) in this context.
///
/// In general, the complexity is O(L), where L is the number of loop
BlockNode TargetNode;
uint64_t Amount;
Weight() : Type(Local), Amount(0) {}
+ Weight(DistType Type, BlockNode TargetNode, uint64_t Amount)
+ : Type(Type), TargetNode(TargetNode), Amount(Amount) {}
};
/// \brief Distribution of unscaled probability weight.
/// \brief Compute the loop scale for a loop.
void computeLoopScale(LoopData &Loop);
+ /// Adjust the mass of all headers in an irreducible loop.
+ ///
+ /// Initially, irreducible loops are assumed to distribute their mass
+ /// equally among its headers. This can lead to wrong frequency estimates
+ /// since some headers may be executed more frequently than others.
+ ///
+ /// This adjusts header mass distribution so it matches the weights of
+ /// the backedges going into each of the loop headers.
+ void adjustLoopHeaderMass(LoopData &Loop);
+
/// \brief Package up a loop.
void packageLoop(LoopData &Loop);
else
addBlockEdges(*this, Irr, OuterLoop);
}
-}
+} // namespace bfi_detail
/// \brief Shared implementation for block frequency analysis.
///
/// - Distribute the mass accordingly, dithering to minimize mass loss,
/// as described in \a distributeMass().
///
+/// In the case of irreducible loops, instead of a single loop header,
+/// there will be several. The computation of backedge masses is similar
+/// but instead of having a single backedge mass, there will be one
+/// backedge per loop header. In these cases, each backedge will carry
+/// a mass proportional to the edge weights along the corresponding
+/// path.
+///
+/// At the end of propagation, the full mass assigned to the loop will be
+/// distributed among the loop headers proportionally according to the
+/// mass flowing through their backedges.
+///
/// Finally, calculate the loop scale from the accumulated backedge mass.
///
/// 3. Distribute mass in the function (\a computeMassInFunction()).
///
/// It has some known flaws.
///
-/// - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting
-/// BlockFrequency's 64-bit integer precision.
-///
/// - The model of irreducible control flow is a rough approximation.
///
/// Modelling irreducible control flow exactly involves setting up and
/// as sub-loops, rather than arbitrarily shoving the problematic
/// blocks into the headers of the main irreducible SCC.
///
-/// - Backedge frequencies are assumed to be evenly split between the
-/// headers of a given irreducible SCC. Instead, we could track the
-/// backedge mass separately for each header, and adjust their relative
-/// frequencies.
-///
/// - Entry frequencies are assumed to be evenly split between the
/// headers of a given irreducible SCC, which is the only option if we
/// need to compute mass in the SCC before its parent loop. Instead,
///
/// \pre \a computeMassInLoop() has been called for each subloop of \c
/// OuterLoop.
- /// \pre \c Insert points at the the last loop successfully processed by \a
+ /// \pre \c Insert points at the last loop successfully processed by \a
/// computeMassInLoop().
/// \pre \c OuterLoop has irreducible SCCs.
void computeIrreducibleMass(LoopData *OuterLoop,
initializeRPOT();
initializeLoops();
- // Visit loops in post-order to find thelocal mass distribution, and then do
+ // Visit loops in post-order to find the local mass distribution, and then do
// the full function.
computeMassInLoops();
computeMassInFunction();
for (const BlockNode &M : Loop.Nodes)
if (!propagateMassToSuccessors(&Loop, M))
llvm_unreachable("unhandled irreducible control flow");
+
+ adjustLoopHeaderMass(Loop);
} else {
Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
G.addEdge(Irr, BFI.getNode(*I), OuterLoop);
}
};
-}
+} // namespace bfi_detail
template <class BT>
void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass(
LoopData *OuterLoop, std::list<LoopData>::iterator Insert) {