tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp

   1 //===-- llvm-bcanalyzer.cpp - Byte Code Analyzer --------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Reid Spencer and is distributed under the
   6 // University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This tool may be invoked in the following manner:
  11 //  llvm-bcanalyzer [options]      - Read LLVM bytecode from stdin
  12 //  llvm-bcanalyzer [options] x.bc - Read LLVM bytecode from the x.bc file
  13 //
  14 //  Options:
  15 //      --help      - Output information about command line switches
  16 //      --dump      - Dump low-level bytecode structure in readable format
  17 //
  18 // This tool provides analytical information about a bytecode file. It is
  19 // intended as an aid to developers of bytecode reading and writing software. It
  20 // produces on std::out a summary of the bytecode file that shows various
  21 // statistics about the contents of the file. By default this information is
  22 // detailed and contains information about individual bytecode blocks and the
  23 // functions in the module.
  24 // The tool is also able to print a bytecode file in a straight forward text
  25 // format that shows the containment and relationships of the information in
  26 // the bytecode file (-dump option).
  27 //
  28 //===----------------------------------------------------------------------===//
  29
  30 #include "llvm/Analysis/Verifier.h"
  31 #include "llvm/Bitcode/BitstreamReader.h"
  32 #include "llvm/Bitcode/LLVMBitCodes.h"
  33 #include "llvm/Support/CommandLine.h"
  34 #include "llvm/Support/ManagedStatic.h"
  35 #include "llvm/Support/MemoryBuffer.h"
  36 #include "llvm/System/Signals.h"
  37 #include <map>
  38 #include <fstream>
  39 #include <iostream>
  40 #include <algorithm>
  41 using namespace llvm;
  42
  43 static cl::opt<std::string>
  44   InputFilename(cl::Positional, cl::desc("<input bytecode>"), cl::init("-"));
  45
  46 static cl::opt<std::string>
  47   OutputFilename("-o", cl::init("-"), cl::desc("<output file>"));
  48
  49 static cl::opt<bool> Dump("dump", cl::desc("Dump low level bytecode trace"));
  50
  51 //===----------------------------------------------------------------------===//
  52 // Bitcode specific analysis.
  53 //===----------------------------------------------------------------------===//
  54
  55 static cl::opt<bool> NoHistogram("disable-histogram",
  56                                  cl::desc("Do not print per-code histogram"));
  57
  58 static cl::opt<bool>
  59 NonSymbolic("non-symbolic",
  60             cl::desc("Emit numberic info in dump even if"
  61                      " symbolic info is available"));
  62
  63 /// CurStreamType - If we can sniff the flavor of this stream, we can produce
  64 /// better dump info.
  65 static enum {
  66   UnknownBitstream,
  67   LLVMIRBitstream
  68 } CurStreamType;
  69
  70
  71 /// GetBlockName - Return a symbolic block name if known, otherwise return
  72 /// null.
  73 static const char *GetBlockName(unsigned BlockID) {
  74   // Standard blocks for all bitcode files.
  75   if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
  76     if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
  77       return "BLOCKINFO_BLOCK";
  78     return 0;
  79   }
  80
  81   if (CurStreamType != LLVMIRBitstream) return 0;
  82
  83   switch (BlockID) {
  84   default:                          return 0;
  85   case bitc::MODULE_BLOCK_ID:       return "MODULE_BLOCK";
  86   case bitc::PARAMATTR_BLOCK_ID:    return "PARAMATTR_BLOCK";
  87   case bitc::TYPE_BLOCK_ID:         return "TYPE_BLOCK";
  88   case bitc::CONSTANTS_BLOCK_ID:    return "CONSTANTS_BLOCK";
  89   case bitc::FUNCTION_BLOCK_ID:     return "FUNCTION_BLOCK";
  90   case bitc::TYPE_SYMTAB_BLOCK_ID:  return "TYPE_SYMTAB";
  91   case bitc::VALUE_SYMTAB_BLOCK_ID: return "VALUE_SYMTAB";
  92   }
  93 }
  94
  95 /// GetCodeName - Return a symbolic code name if known, otherwise return
  96 /// null.
  97 static const char *GetCodeName(unsigned CodeID, unsigned BlockID) {
  98   // Standard blocks for all bitcode files.
  99   if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
 100     if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
 101       switch (CodeID) {
 102       default: return 0;
 103       case bitc::MODULE_CODE_VERSION:     return "VERSION";
 104       }
 105     }
 106     return 0;
 107   }
 108
 109   if (CurStreamType != LLVMIRBitstream) return 0;
 110
 111   switch (BlockID) {
 112   default: return 0;
 113   case bitc::MODULE_BLOCK_ID:
 114     switch (CodeID) {
 115     default: return 0;
 116     case bitc::MODULE_CODE_VERSION:     return "VERSION";
 117     case bitc::MODULE_CODE_TRIPLE:      return "TRIPLE";
 118     case bitc::MODULE_CODE_DATALAYOUT:  return "DATALAYOUT";
 119     case bitc::MODULE_CODE_ASM:         return "ASM";
 120     case bitc::MODULE_CODE_SECTIONNAME: return "SECTIONNAME";
 121     case bitc::MODULE_CODE_DEPLIB:      return "DEPLIB";
 122     case bitc::MODULE_CODE_GLOBALVAR:   return "GLOBALVAR";
 123     case bitc::MODULE_CODE_FUNCTION:    return "FUNCTION";
 124     case bitc::MODULE_CODE_ALIAS:       return "ALIAS";
 125     case bitc::MODULE_CODE_PURGEVALS:   return "PURGEVALS";
 126     }
 127   case bitc::PARAMATTR_BLOCK_ID:
 128     switch (CodeID) {
 129     default: return 0;
 130     case bitc::PARAMATTR_CODE_ENTRY: return "ENTRY";
 131     }
 132   case bitc::TYPE_BLOCK_ID:
 133     switch (CodeID) {
 134     default: return 0;
 135     case bitc::TYPE_CODE_NUMENTRY: return "NUMENTRY";
 136     case bitc::TYPE_CODE_VOID:     return "VOID";
 137     case bitc::TYPE_CODE_FLOAT:    return "FLOAT";
 138     case bitc::TYPE_CODE_DOUBLE:   return "DOUBLE";
 139     case bitc::TYPE_CODE_LABEL:    return "LABEL";
 140     case bitc::TYPE_CODE_OPAQUE:   return "OPAQUE";
 141     case bitc::TYPE_CODE_INTEGER:  return "INTEGER";
 142     case bitc::TYPE_CODE_POINTER:  return "POINTER";
 143     case bitc::TYPE_CODE_FUNCTION: return "FUNCTION";
 144     case bitc::TYPE_CODE_STRUCT:   return "STRUCT";
 145     case bitc::TYPE_CODE_ARRAY:    return "ARRAY";
 146     case bitc::TYPE_CODE_VECTOR:   return "VECTOR";
 147     }
 148
 149   case bitc::CONSTANTS_BLOCK_ID:
 150     switch (CodeID) {
 151     default: return 0;
 152     case bitc::CST_CODE_SETTYPE:       return "SETTYPE";
 153     case bitc::CST_CODE_NULL:          return "NULL";
 154     case bitc::CST_CODE_UNDEF:         return "UNDEF";
 155     case bitc::CST_CODE_INTEGER:       return "INTEGER";
 156     case bitc::CST_CODE_WIDE_INTEGER:  return "WIDE_INTEGER";
 157     case bitc::CST_CODE_FLOAT:         return "FLOAT";
 158     case bitc::CST_CODE_AGGREGATE:     return "AGGREGATE";
 159     case bitc::CST_CODE_STRING:        return "STRING";
 160     case bitc::CST_CODE_CSTRING:       return "CSTRING";
 161     case bitc::CST_CODE_CE_BINOP:      return "CE_BINOP";
 162     case bitc::CST_CODE_CE_CAST:       return "CE_CAST";
 163     case bitc::CST_CODE_CE_GEP:        return "CE_GEP";
 164     case bitc::CST_CODE_CE_SELECT:     return "CE_SELECT";
 165     case bitc::CST_CODE_CE_EXTRACTELT: return "CE_EXTRACTELT";
 166     case bitc::CST_CODE_CE_INSERTELT:  return "CE_INSERTELT";
 167     case bitc::CST_CODE_CE_SHUFFLEVEC: return "CE_SHUFFLEVEC";
 168     case bitc::CST_CODE_CE_CMP:        return "CE_CMP";
 169     case bitc::CST_CODE_INLINEASM:     return "INLINEASM";
 170     }
 171   case bitc::FUNCTION_BLOCK_ID:
 172     switch (CodeID) {
 173     default: return 0;
 174     case bitc::FUNC_CODE_DECLAREBLOCKS: return "DECLAREBLOCKS";
 175
 176     case bitc::FUNC_CODE_INST_BINOP:       return "INST_BINOP";
 177     case bitc::FUNC_CODE_INST_CAST:        return "INST_CAST";
 178     case bitc::FUNC_CODE_INST_GEP:         return "INST_GEP";
 179     case bitc::FUNC_CODE_INST_SELECT:      return "INST_SELECT";
 180     case bitc::FUNC_CODE_INST_EXTRACTELT:  return "INST_EXTRACTELT";
 181     case bitc::FUNC_CODE_INST_INSERTELT:   return "INST_INSERTELT";
 182     case bitc::FUNC_CODE_INST_SHUFFLEVEC:  return "INST_SHUFFLEVEC";
 183     case bitc::FUNC_CODE_INST_CMP:         return "INST_CMP";
 184
 185     case bitc::FUNC_CODE_INST_RET:         return "INST_RET";
 186     case bitc::FUNC_CODE_INST_BR:          return "INST_BR";
 187     case bitc::FUNC_CODE_INST_SWITCH:      return "INST_SWITCH";
 188     case bitc::FUNC_CODE_INST_INVOKE:      return "INST_INVOKE";
 189     case bitc::FUNC_CODE_INST_UNWIND:      return "INST_UNWIND";
 190     case bitc::FUNC_CODE_INST_UNREACHABLE: return "INST_UNREACHABLE";
 191
 192     case bitc::FUNC_CODE_INST_PHI:         return "INST_PHI";
 193     case bitc::FUNC_CODE_INST_MALLOC:      return "INST_MALLOC";
 194     case bitc::FUNC_CODE_INST_FREE:        return "INST_FREE";
 195     case bitc::FUNC_CODE_INST_ALLOCA:      return "INST_ALLOCA";
 196     case bitc::FUNC_CODE_INST_LOAD:        return "INST_LOAD";
 197     case bitc::FUNC_CODE_INST_STORE:       return "INST_STORE";
 198     case bitc::FUNC_CODE_INST_CALL:        return "INST_CALL";
 199     case bitc::FUNC_CODE_INST_VAARG:       return "INST_VAARG";
 200     }
 201   case bitc::TYPE_SYMTAB_BLOCK_ID:
 202     switch (CodeID) {
 203     default: return 0;
 204     case bitc::TST_CODE_ENTRY: return "ENTRY";
 205     }
 206   case bitc::VALUE_SYMTAB_BLOCK_ID:
 207     switch (CodeID) {
 208     default: return 0;
 209     case bitc::VST_CODE_ENTRY: return "ENTRY";
 210     case bitc::VST_CODE_BBENTRY: return "BBENTRY";
 211     }
 212   }
 213 }
 214
 215
 216 struct PerBlockIDStats {
 217   /// NumInstances - This the number of times this block ID has been seen.
 218   unsigned NumInstances;
 219
 220   /// NumBits - The total size in bits of all of these blocks.
 221   uint64_t NumBits;
 222
 223   /// NumSubBlocks - The total number of blocks these blocks contain.
 224   unsigned NumSubBlocks;
 225
 226   /// NumAbbrevs - The total number of abbreviations.
 227   unsigned NumAbbrevs;
 228
 229   /// NumRecords - The total number of records these blocks contain, and the
 230   /// number that are abbreviated.
 231   unsigned NumRecords, NumAbbreviatedRecords;
 232
 233   /// CodeFreq - Keep track of the number of times we see each code.
 234   std::vector<unsigned> CodeFreq;
 235
 236   PerBlockIDStats()
 237     : NumInstances(0), NumBits(0),
 238       NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0) {}
 239 };
 240
 241 static std::map<unsigned, PerBlockIDStats> BlockIDStats;
 242
 243
 244
 245 /// Error - All bitcode analysis errors go through this function, making this a
 246 /// good place to breakpoint if debugging.
 247 static bool Error(const std::string &Err) {
 248   std::cerr << Err << "\n";
 249   return true;
 250 }
 251
 252 /// ParseBlock - Read a block, updating statistics, etc.
 253 static bool ParseBlock(BitstreamReader &Stream, unsigned IndentLevel) {
 254   std::string Indent(IndentLevel*2, ' ');
 255   uint64_t BlockBitStart = Stream.GetCurrentBitNo();
 256   unsigned BlockID = Stream.ReadSubBlockID();
 257
 258   // Get the statistics for this BlockID.
 259   PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
 260
 261   BlockStats.NumInstances++;
 262
 263   // BLOCKINFO is a special part of the stream.
 264   if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
 265     if (Dump) std::cerr << Indent << "<BLOCKINFO_BLOCK/>\n";
 266     if (Stream.ReadBlockInfoBlock())
 267       return Error("Malformed BlockInfoBlock");
 268     uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
 269     BlockStats.NumBits += BlockBitEnd-BlockBitStart;
 270     return false;
 271   }
 272
 273   unsigned NumWords = 0;
 274   if (Stream.EnterSubBlock(BlockID, &NumWords))
 275     return Error("Malformed block record");
 276
 277   const char *BlockName = 0;
 278   if (Dump) {
 279     std::cerr << Indent << "<";
 280     if ((BlockName = GetBlockName(BlockID)))
 281       std::cerr << BlockName;
 282     else
 283       std::cerr << "UnknownBlock" << BlockID;
 284
 285     if (NonSymbolic && BlockName)
 286       std::cerr << " BlockID=" << BlockID;
 287
 288     std::cerr << " NumWords=" << NumWords
 289               << " BlockCodeSize=" << Stream.GetAbbrevIDWidth() << ">\n";
 290   }
 291
 292   SmallVector<uint64_t, 64> Record;
 293
 294   // Read all the records for this block.
 295   while (1) {
 296     if (Stream.AtEndOfStream())
 297       return Error("Premature end of bitstream");
 298
 299     // Read the code for this record.
 300     unsigned AbbrevID = Stream.ReadCode();
 301     switch (AbbrevID) {
 302     case bitc::END_BLOCK: {
 303       if (Stream.ReadBlockEnd())
 304         return Error("Error at end of block");
 305       uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
 306       BlockStats.NumBits += BlockBitEnd-BlockBitStart;
 307       if (Dump) {
 308         std::cerr << Indent << "</";
 309         if (BlockName)
 310           std::cerr << BlockName << ">\n";
 311         else
 312           std::cerr << "UnknownBlock" << BlockID << ">\n";
 313       }
 314       return false;
 315     }
 316     case bitc::ENTER_SUBBLOCK: {
 317       uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
 318       if (ParseBlock(Stream, IndentLevel+1))
 319         return true;
 320       ++BlockStats.NumSubBlocks;
 321       uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
 322
 323       // Don't include subblock sizes in the size of this block.
 324       BlockBitStart += SubBlockBitEnd-SubBlockBitStart;
 325       break;
 326     }
 327     case bitc::DEFINE_ABBREV:
 328       Stream.ReadAbbrevRecord();
 329       ++BlockStats.NumAbbrevs;
 330       break;
 331     default:
 332       ++BlockStats.NumRecords;
 333       if (AbbrevID != bitc::UNABBREV_RECORD)
 334         ++BlockStats.NumAbbreviatedRecords;
 335
 336       Record.clear();
 337       unsigned Code = Stream.ReadRecord(AbbrevID, Record);
 338
 339       // Increment the # occurrences of this code.
 340       if (BlockStats.CodeFreq.size() <= Code)
 341         BlockStats.CodeFreq.resize(Code+1);
 342       BlockStats.CodeFreq[Code]++;
 343
 344       if (Dump) {
 345         std::cerr << Indent << "  <";
 346         if (const char *CodeName = GetCodeName(Code, BlockID))
 347           std::cerr << CodeName;
 348         else
 349           std::cerr << "UnknownCode" << Code;
 350         if (NonSymbolic && GetCodeName(Code, BlockID))
 351           std::cerr << " codeid=" << Code;
 352         if (AbbrevID != bitc::UNABBREV_RECORD)
 353           std::cerr << " abbrevid=" << AbbrevID;
 354
 355         for (unsigned i = 0, e = Record.size(); i != e; ++i)
 356           std::cerr << " op" << i << "=" << (int64_t)Record[i];
 357
 358         std::cerr << "/>\n";
 359       }
 360
 361       break;
 362     }
 363   }
 364 }
 365
 366 static void PrintSize(double Bits) {
 367   std::cerr << Bits << "b/" << Bits/8 << "B/" << Bits/32 << "W";
 368 }
 369
 370
 371 /// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename.
 372 static int AnalyzeBitcode() {
 373   // Read the input file.
 374   MemoryBuffer *Buffer;
 375   if (InputFilename == "-")
 376     Buffer = MemoryBuffer::getSTDIN();
 377   else
 378     Buffer = MemoryBuffer::getFile(&InputFilename[0], InputFilename.size());
 379
 380   if (Buffer == 0)
 381     return Error("Error reading '" + InputFilename + "'.");
 382
 383   if (Buffer->getBufferSize() & 3)
 384     return Error("Bitcode stream should be a multiple of 4 bytes in length");
 385
 386   unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
 387   BitstreamReader Stream(BufPtr, BufPtr+Buffer->getBufferSize());
 388
 389
 390   // Read the stream signature.
 391   char Signature[6];
 392   Signature[0] = Stream.Read(8);
 393   Signature[1] = Stream.Read(8);
 394   Signature[2] = Stream.Read(4);
 395   Signature[3] = Stream.Read(4);
 396   Signature[4] = Stream.Read(4);
 397   Signature[5] = Stream.Read(4);
 398
 399   // Autodetect the file contents, if it is one we know.
 400   CurStreamType = UnknownBitstream;
 401   if (Signature[0] == 'B' && Signature[1] == 'C' &&
 402       Signature[2] == 0x0 && Signature[3] == 0xC &&
 403       Signature[4] == 0xE && Signature[5] == 0xD)
 404     CurStreamType = LLVMIRBitstream;
 405
 406   unsigned NumTopBlocks = 0;
 407
 408   // Parse the top-level structure.  We only allow blocks at the top-level.
 409   while (!Stream.AtEndOfStream()) {
 410     unsigned Code = Stream.ReadCode();
 411     if (Code != bitc::ENTER_SUBBLOCK)
 412       return Error("Invalid record at top-level");
 413
 414     if (ParseBlock(Stream, 0))
 415       return true;
 416     ++NumTopBlocks;
 417   }
 418
 419   if (Dump) std::cerr << "\n\n";
 420
 421   uint64_t BufferSizeBits = Buffer->getBufferSize()*8;
 422   // Print a summary of the read file.
 423   std::cerr << "Summary of " << InputFilename << ":\n";
 424   std::cerr << "         Total size: ";
 425   PrintSize(BufferSizeBits);
 426   std::cerr << "\n";
 427   std::cerr << "        Stream type: ";
 428   switch (CurStreamType) {
 429   default: assert(0 && "Unknown bitstream type");
 430   case UnknownBitstream: std::cerr << "unknown\n"; break;
 431   case LLVMIRBitstream:  std::cerr << "LLVM IR\n"; break;
 432   }
 433   std::cerr << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
 434   std::cerr << "\n";
 435
 436   // Emit per-block stats.
 437   std::cerr << "Per-block Summary:\n";
 438   for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
 439        E = BlockIDStats.end(); I != E; ++I) {
 440     std::cerr << "  Block ID #" << I->first;
 441     if (const char *BlockName = GetBlockName(I->first))
 442       std::cerr << " (" << BlockName << ")";
 443     std::cerr << ":\n";
 444
 445     const PerBlockIDStats &Stats = I->second;
 446     std::cerr << "      Num Instances: " << Stats.NumInstances << "\n";
 447     std::cerr << "         Total Size: ";
 448     PrintSize(Stats.NumBits);
 449     std::cerr << "\n";
 450     std::cerr << "          % of file: "
 451               << Stats.NumBits/(double)BufferSizeBits*100 << "\n";
 452     if (Stats.NumInstances > 1) {
 453       std::cerr << "       Average Size: ";
 454       PrintSize(Stats.NumBits/(double)Stats.NumInstances);
 455       std::cerr << "\n";
 456       std::cerr << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
 457                 << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
 458       std::cerr << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
 459                 << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
 460       std::cerr << "    Tot/Avg Records: " << Stats.NumRecords << "/"
 461                 << Stats.NumRecords/(double)Stats.NumInstances << "\n";
 462     } else {
 463       std::cerr << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
 464       std::cerr << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
 465       std::cerr << "        Num Records: " << Stats.NumRecords << "\n";
 466     }
 467     if (Stats.NumRecords)
 468       std::cerr << "      % Abbrev Recs: " << (Stats.NumAbbreviatedRecords/
 469                    (double)Stats.NumRecords)*100 << "\n";
 470     std::cerr << "\n";
 471
 472     // Print a histogram of the codes we see.
 473     if (!NoHistogram && !Stats.CodeFreq.empty()) {
 474       std::vector<std::pair<unsigned, unsigned> > FreqPairs;  // <freq,code>
 475       for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
 476         if (unsigned Freq = Stats.CodeFreq[i])
 477           FreqPairs.push_back(std::make_pair(Freq, i));
 478       std::stable_sort(FreqPairs.begin(), FreqPairs.end());
 479       std::reverse(FreqPairs.begin(), FreqPairs.end());
 480
 481       std::cerr << "\tCode Histogram:\n";
 482       for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
 483         std::cerr << "\t\t" << FreqPairs[i].first << "\t";
 484         if (const char *CodeName = GetCodeName(FreqPairs[i].second, I->first))
 485           std::cerr << CodeName << "\n";
 486         else
 487           std::cerr << "UnknownCode" << FreqPairs[i].second << "\n";
 488       }
 489       std::cerr << "\n";
 490
 491     }
 492   }
 493   return 0;
 494 }
 495
 496
 497 int main(int argc, char **argv) {
 498   llvm_shutdown_obj X;  // Call llvm_shutdown() on exit.
 499   cl::ParseCommandLineOptions(argc, argv, " llvm-bcanalyzer file analyzer\n");
 500
 501   sys::PrintStackTraceOnErrorSignal();
 502
 503   return AnalyzeBitcode();
 504 }