#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
}
struct PerRecordStats {
+ /// The number of times this record code has been seen.
unsigned NumInstances;
+ /// The number of times this record code has been seen abbreviated.
unsigned NumAbbrev;
+ /// The total number of bits used for this record on disk.
uint64_t TotalBits;
-
- PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
+ /// The number of bits that would have been used if no abbrevs were used.
+ uint64_t UnabbrevBits;
+ /// The number of bits that would be used if "good" abbreviations were used.
+ uint64_t GoodAbbrevBits;
+
+ PerRecordStats()
+ : NumInstances(0), NumAbbrev(0), TotalBits(0), UnabbrevBits(0),
+ GoodAbbrevBits(0) {}
};
struct PerBlockIDStats {
return true;
}
+static unsigned computeVBR6Bits(uint64_t Val) {
+ unsigned Bits = 0;
+ do {
+ Bits += 6;
+ } while (Val >>= 5);
+ return Bits;
+}
+
+static void addBlobSize(uint64_t &Bits, StringRef Blob) {
+ // Blob size is always VBR6.
+ Bits += computeVBR6Bits(Blob.size());
+
+ // Blob is always 32-bit aligned, and padded to a multiple of 32 bits.
+ RoundUpToAlignment(Bits, 32);
+ Bits += Blob.size() * 8;
+ RoundUpToAlignment(Bits, 32);
+}
+
+/// \brief Compute the number of bits that would be used by the record if it
+/// were unabbreviated.
+static uint64_t computeUnabbrevBits(unsigned AbbrevIDWidth, unsigned Code,
+ ArrayRef<uint64_t> Record, StringRef Blob) {
+ uint64_t Bits =
+ AbbrevIDWidth + computeVBR6Bits(Code) + computeVBR6Bits(Record.size());
+ // Use VBR6 for all fields.
+ for (uint64_t Val : Record)
+ Bits += computeVBR6Bits(Val);
+ // Assume Blob representation for the blob, even though a Blob cannot
+ // be unabbreviated.
+ if (!Blob.empty())
+ addBlobSize(Bits, Blob);
+ return Bits;
+}
+
+/// \brief Compute the number of bits that would be used by the record if a
+/// "good" abbreviation were used. We use an extremely simple heuristic for
+/// "good"ness: pick the best abbrev that uses a Literal for the record code,
+/// a normal Blob field for the blob (if present), and a minimal-width Fixed
+/// field for everything else.
+static uint64_t computeGoodAbbrevBits(unsigned AbbrevIDWidth, unsigned Code,
+ ArrayRef<uint64_t> Record,
+ StringRef Blob) {
+ uint64_t Bits = AbbrevIDWidth;
+ // Use Fixed for all fields (other than the record code).
+ for (uint64_t Val : Record)
+ Bits += 64 - llvm::countLeadingZeros(Val);
+ // Assume Blob representation for the blob.
+ if (!Blob.empty())
+ addBlobSize(Bits, Blob);
+ return Bits;
+}
+
/// ParseBlock - Read a block, updating statistics, etc.
static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
unsigned IndentLevel, CurStreamTypeType CurStreamType) {
BlockStats.CodeFreq[Code].NumInstances++;
BlockStats.CodeFreq[Code].TotalBits +=
Stream.GetCurrentBitNo()-RecordStartBit;
+ BlockStats.CodeFreq[Code].UnabbrevBits +=
+ computeUnabbrevBits(Stream.getAbbrevIDWidth(), Code, Record, Blob);
+ BlockStats.CodeFreq[Code].GoodAbbrevBits +=
+ computeGoodAbbrevBits(Stream.getAbbrevIDWidth(), Code, Record, Blob);
if (Entry.ID != bitc::UNABBREV_RECORD) {
BlockStats.CodeFreq[Code].NumAbbrev++;
++BlockStats.NumAbbreviatedRecords;
// Print a histogram of the codes we see.
if (!NoHistogram && !Stats.CodeFreq.empty()) {
- std::vector<std::pair<unsigned, unsigned> > FreqPairs; // <freq,code>
+ std::vector<std::pair<uint64_t, unsigned> > FreqPairs; // <bits,code>
for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
- if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
+ if (unsigned Freq = Stats.CodeFreq[i].TotalBits)
FreqPairs.push_back(std::make_pair(Freq, i));
std::stable_sort(FreqPairs.begin(), FreqPairs.end());
std::reverse(FreqPairs.begin(), FreqPairs.end());
outs() << "\tRecord Histogram:\n";
- outs() << "\t\t Count # Bits %% Abv Record Kind\n";
+ outs() << "\t\t Count # Bits % Abv % Cmp (cur/best) Record Kind\n";
for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
- outs() << format("\t\t%7d %9lu",
+ outs() << format("\t\t%7d %9lu ",
RecStats.NumInstances,
(unsigned long)RecStats.TotalBits);
if (RecStats.NumAbbrev)
outs() <<
- format("%7.2f ",
+ format("%7.2f ",
(double)RecStats.NumAbbrev/RecStats.NumInstances*100);
else
- outs() << " ";
+ outs() << " ";
+
+ if (RecStats.UnabbrevBits)
+ outs() << format(
+ "%7.2f / %7.2f ",
+ (double)RecStats.TotalBits / RecStats.UnabbrevBits * 100,
+ (double)RecStats.GoodAbbrevBits / RecStats.UnabbrevBits * 100);
+ else
+ outs() << " ";
if (const char *CodeName =
GetCodeName(FreqPairs[i].second, I->first, StreamFile,