X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FSupport%2FBlockFrequency.cpp;h=6f7e341904b90f85c33ce4c4141ffbae2d7b52c4;hb=e8757c5dbbb50a0ac106f01360c462a3217ef62f;hp=d1f8408dfcbb699776f47eb918735473881fa315;hpb=7cc5f793bc858e4be4fbf532942d14013f123e55;p=oota-llvm.git diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp index d1f8408dfcb..6f7e341904b 100644 --- a/lib/Support/BlockFrequency.cpp +++ b/lib/Support/BlockFrequency.cpp @@ -18,94 +18,8 @@ using namespace llvm; -/// Multiply FREQ by N and store result in W array. -static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) { - uint64_t u0 = freq & UINT32_MAX; - uint64_t u1 = freq >> 32; - - // Represent 96-bit value as W[2]:W[1]:W[0]; - uint64_t t = u0 * N; - uint64_t k = t >> 32; - W[0] = t; - t = u1 * N + k; - W[1] = t; - W[2] = t >> 32; -} - -/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a -/// 32 bit unsigned integer, we can use a short division algorithm. -static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) { - // We assume that W[2] is non-zero since if W[2] is not then the user should - // just use hardware division. - assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is " - "zero, the caller should just use 64/32 hardware."); - uint32_t Q[3] = { 0, 0, 0 }; - - // The generalized short division algorithm sets i to m + n - 1, where n is - // the number of words in the divisior and m is the number of words by which - // the divident exceeds the divisor (i.e. m + n == the length of the dividend - // in words). Due to our assumption that W[2] is non-zero, we know that the - // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to - // m + n - 1 = 2 + 1 - 1 = 2. - uint32_t R = 0; - for (int i = 2; i >= 0; --i) { - uint64_t PartialD = uint64_t(R) << 32 | W[i]; - if (PartialD == 0) { - Q[i] = 0; - R = 0; - } else if (PartialD < D) { - Q[i] = 0; - R = uint32_t(PartialD); - } else if (PartialD == D) { - Q[i] = 1; - R = 0; - } else { - Q[i] = uint32_t(PartialD / D); - R = uint32_t(PartialD - (Q[i] * D)); - } - } - - // If Q[2] is non-zero, then we overflowed. - uint64_t Result; - if (Q[2]) { - Result = UINT64_MAX; - R = D; - } else { - // Form the final uint64_t result, avoiding endianness issues. - Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32); - } - - if (Rout) - *Rout = R; - - return Result; -} - -uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) { - assert(D != 0 && "Division by zero"); - - // Calculate Frequency * N. - uint64_t MulLo = (Frequency & UINT32_MAX) * N; - uint64_t MulHi = (Frequency >> 32) * N; - uint64_t MulRes = (MulHi << 32) + MulLo; - - // If the product fits in 64 bits, just use built-in division. - if (MulHi <= UINT32_MAX && MulRes >= MulLo) { - Frequency = MulRes / D; - return MulRes % D; - } - - // Product overflowed, use 96-bit operations. - // 96-bit value represented as W[2]:W[1]:W[0]. - uint32_t W[3]; - uint32_t R; - mult96bit(Frequency, N, W); - Frequency = divrem96bit(W, D, &R); - return R; -} - BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { - scale(Prob.getNumerator(), Prob.getDenominator()); + Frequency = Prob.scale(Frequency); return *this; } @@ -117,7 +31,7 @@ BlockFrequency::operator*(const BranchProbability &Prob) const { } BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) { - scale(Prob.getDenominator(), Prob.getNumerator()); + Frequency = Prob.scaleByInverse(Frequency); return *this; } @@ -145,7 +59,14 @@ BlockFrequency::operator+(const BlockFrequency &Prob) const { return Freq; } -uint32_t BlockFrequency::scale(const BranchProbability &Prob) { - return scale(Prob.getNumerator(), Prob.getDenominator()); -} +BlockFrequency &BlockFrequency::operator>>=(const unsigned count) { + // Frequency can never be 0 by design. + assert(Frequency != 0); + + // Shift right by count. + Frequency >>= count; + // Saturate to 1 if we are 0. + Frequency |= Frequency == 0; + return *this; +}