include/llvm/Bitcode/BitstreamReader.h

   1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This header defines the BitstreamReader class.  This class can be used to
  11 // read an arbitrary bitstream, regardless of its contents.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H
  16 #define LLVM_BITCODE_BITSTREAMREADER_H
  17
  18 #include "llvm/Bitcode/BitCodes.h"
  19 #include "llvm/Support/Endian.h"
  20 #include "llvm/Support/StreamableMemoryObject.h"
  21 #include <climits>
  22 #include <string>
  23 #include <vector>
  24
  25 namespace llvm {
  26
  27   class Deserializer;
  28
  29 /// BitstreamReader - This class is used to read from an LLVM bitcode stream,
  30 /// maintaining information that is global to decoding the entire file.  While
  31 /// a file is being read, multiple cursors can be independently advanced or
  32 /// skipped around within the file.  These are represented by the
  33 /// BitstreamCursor class.
  34 class BitstreamReader {
  35 public:
  36   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
  37   /// These describe abbreviations that all blocks of the specified ID inherit.
  38   struct BlockInfo {
  39     unsigned BlockID;
  40     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
  41     std::string Name;
  42
  43     std::vector<std::pair<unsigned, std::string> > RecordNames;
  44   };
  45 private:
  46   std::unique_ptr<StreamableMemoryObject> BitcodeBytes;
  47
  48   std::vector<BlockInfo> BlockInfoRecords;
  49
  50   /// IgnoreBlockInfoNames - This is set to true if we don't care about the
  51   /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
  52   /// uses this.
  53   bool IgnoreBlockInfoNames;
  54
  55   BitstreamReader(const BitstreamReader&) LLVM_DELETED_FUNCTION;
  56   void operator=(const BitstreamReader&) LLVM_DELETED_FUNCTION;
  57 public:
  58   BitstreamReader() : IgnoreBlockInfoNames(true) {
  59   }
  60
  61   BitstreamReader(const unsigned char *Start, const unsigned char *End)
  62       : IgnoreBlockInfoNames(true) {
  63     init(Start, End);
  64   }
  65
  66   BitstreamReader(StreamableMemoryObject *bytes) : IgnoreBlockInfoNames(true) {
  67     BitcodeBytes.reset(bytes);
  68   }
  69
  70   BitstreamReader(BitstreamReader &&Other) {
  71     *this = std::move(Other);
  72   }
  73
  74   BitstreamReader &operator=(BitstreamReader &&Other) {
  75     BitcodeBytes = std::move(Other.BitcodeBytes);
  76     // Explicitly swap block info, so that nothing gets destroyed twice.
  77     std::swap(BlockInfoRecords, Other.BlockInfoRecords);
  78     IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames;
  79     return *this;
  80   }
  81
  82   void init(const unsigned char *Start, const unsigned char *End) {
  83     assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
  84     BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
  85   }
  86
  87   StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
  88
  89   /// CollectBlockInfoNames - This is called by clients that want block/record
  90   /// name information.
  91   void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
  92   bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
  93
  94   //===--------------------------------------------------------------------===//
  95   // Block Manipulation
  96   //===--------------------------------------------------------------------===//
  97
  98   /// hasBlockInfoRecords - Return true if we've already read and processed the
  99   /// block info block for this Bitstream.  We only process it for the first
 100   /// cursor that walks over it.
 101   bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
 102
 103   /// getBlockInfo - If there is block info for the specified ID, return it,
 104   /// otherwise return null.
 105   const BlockInfo *getBlockInfo(unsigned BlockID) const {
 106     // Common case, the most recent entry matches BlockID.
 107     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
 108       return &BlockInfoRecords.back();
 109
 110     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
 111          i != e; ++i)
 112       if (BlockInfoRecords[i].BlockID == BlockID)
 113         return &BlockInfoRecords[i];
 114     return nullptr;
 115   }
 116
 117   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
 118     if (const BlockInfo *BI = getBlockInfo(BlockID))
 119       return *const_cast<BlockInfo*>(BI);
 120
 121     // Otherwise, add a new record.
 122     BlockInfoRecords.push_back(BlockInfo());
 123     BlockInfoRecords.back().BlockID = BlockID;
 124     return BlockInfoRecords.back();
 125   }
 126
 127   /// Takes block info from the other bitstream reader.
 128   ///
 129   /// This is a "take" operation because BlockInfo records are non-trivial, and
 130   /// indeed rather expensive.
 131   void takeBlockInfo(BitstreamReader &&Other) {
 132     assert(!hasBlockInfoRecords());
 133     BlockInfoRecords = std::move(Other.BlockInfoRecords);
 134   }
 135 };
 136
 137
 138 /// BitstreamEntry - When advancing through a bitstream cursor, each advance can
 139 /// discover a few different kinds of entries:
 140 ///   Error    - Malformed bitcode was found.
 141 ///   EndBlock - We've reached the end of the current block, (or the end of the
 142 ///              file, which is treated like a series of EndBlock records.
 143 ///   SubBlock - This is the start of a new subblock of a specific ID.
 144 ///   Record   - This is a record with a specific AbbrevID.
 145 ///
 146 struct BitstreamEntry {
 147   enum {
 148     Error,
 149     EndBlock,
 150     SubBlock,
 151     Record
 152   } Kind;
 153
 154   unsigned ID;
 155
 156   static BitstreamEntry getError() {
 157     BitstreamEntry E; E.Kind = Error; return E;
 158   }
 159   static BitstreamEntry getEndBlock() {
 160     BitstreamEntry E; E.Kind = EndBlock; return E;
 161   }
 162   static BitstreamEntry getSubBlock(unsigned ID) {
 163     BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
 164   }
 165   static BitstreamEntry getRecord(unsigned AbbrevID) {
 166     BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
 167   }
 168 };
 169
 170 /// BitstreamCursor - This represents a position within a bitcode file.  There
 171 /// may be multiple independent cursors reading within one bitstream, each
 172 /// maintaining their own local state.
 173 ///
 174 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
 175 /// be passed by value.
 176 class BitstreamCursor {
 177   friend class Deserializer;
 178   BitstreamReader *BitStream;
 179   size_t NextChar;
 180
 181
 182   /// CurWord/word_t - This is the current data we have pulled from the stream
 183   /// but have not returned to the client.  This is specifically and
 184   /// intentionally defined to follow the word size of the host machine for
 185   /// efficiency.  We use word_t in places that are aware of this to make it
 186   /// perfectly explicit what is going on.
 187   typedef uint32_t word_t;
 188   word_t CurWord;
 189
 190   /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
 191   /// is always from [0...31/63] inclusive (depending on word size).
 192   unsigned BitsInCurWord;
 193
 194   // CurCodeSize - This is the declared size of code values used for the current
 195   // block, in bits.
 196   unsigned CurCodeSize;
 197
 198   /// CurAbbrevs - Abbrevs installed at in this block.
 199   std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
 200
 201   struct Block {
 202     unsigned PrevCodeSize;
 203     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
 204     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
 205   };
 206
 207   /// BlockScope - This tracks the codesize of parent blocks.
 208   SmallVector<Block, 8> BlockScope;
 209
 210
 211 public:
 212   BitstreamCursor() : BitStream(nullptr), NextChar(0) {}
 213
 214   explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
 215     NextChar = 0;
 216     CurWord = 0;
 217     BitsInCurWord = 0;
 218     CurCodeSize = 2;
 219   }
 220
 221   void init(BitstreamReader &R) {
 222     freeState();
 223
 224     BitStream = &R;
 225     NextChar = 0;
 226     CurWord = 0;
 227     BitsInCurWord = 0;
 228     CurCodeSize = 2;
 229   }
 230
 231   void freeState();
 232
 233   bool isEndPos(size_t pos) {
 234     return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
 235   }
 236
 237   bool canSkipToPos(size_t pos) const {
 238     // pos can be skipped to if it is a valid address or one byte past the end.
 239     return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
 240         static_cast<uint64_t>(pos - 1));
 241   }
 242
 243   uint32_t getWord(size_t pos) {
 244     uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
 245     BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf);
 246     return *reinterpret_cast<support::ulittle32_t *>(buf);
 247   }
 248
 249   bool AtEndOfStream() {
 250     return BitsInCurWord == 0 && isEndPos(NextChar);
 251   }
 252
 253   /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
 254   unsigned getAbbrevIDWidth() const { return CurCodeSize; }
 255
 256   /// GetCurrentBitNo - Return the bit # of the bit we are reading.
 257   uint64_t GetCurrentBitNo() const {
 258     return NextChar*CHAR_BIT - BitsInCurWord;
 259   }
 260
 261   BitstreamReader *getBitStreamReader() {
 262     return BitStream;
 263   }
 264   const BitstreamReader *getBitStreamReader() const {
 265     return BitStream;
 266   }
 267
 268   /// Flags that modify the behavior of advance().
 269   enum {
 270     /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
 271     /// not automatically pop the block scope when the end of a block is
 272     /// reached.
 273     AF_DontPopBlockAtEnd = 1,
 274
 275     /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
 276     /// returned just like normal records.
 277     AF_DontAutoprocessAbbrevs = 2
 278   };
 279
 280   /// advance - Advance the current bitstream, returning the next entry in the
 281   /// stream.
 282   BitstreamEntry advance(unsigned Flags = 0) {
 283     while (1) {
 284       unsigned Code = ReadCode();
 285       if (Code == bitc::END_BLOCK) {
 286         // Pop the end of the block unless Flags tells us not to.
 287         if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
 288           return BitstreamEntry::getError();
 289         return BitstreamEntry::getEndBlock();
 290       }
 291
 292       if (Code == bitc::ENTER_SUBBLOCK)
 293         return BitstreamEntry::getSubBlock(ReadSubBlockID());
 294
 295       if (Code == bitc::DEFINE_ABBREV &&
 296           !(Flags & AF_DontAutoprocessAbbrevs)) {
 297         // We read and accumulate abbrev's, the client can't do anything with
 298         // them anyway.
 299         ReadAbbrevRecord();
 300         continue;
 301       }
 302
 303       return BitstreamEntry::getRecord(Code);
 304     }
 305   }
 306
 307   /// advanceSkippingSubblocks - This is a convenience function for clients that
 308   /// don't expect any subblocks.  This just skips over them automatically.
 309   BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
 310     while (1) {
 311       // If we found a normal entry, return it.
 312       BitstreamEntry Entry = advance(Flags);
 313       if (Entry.Kind != BitstreamEntry::SubBlock)
 314         return Entry;
 315
 316       // If we found a sub-block, just skip over it and check the next entry.
 317       if (SkipBlock())
 318         return BitstreamEntry::getError();
 319     }
 320   }
 321
 322   /// JumpToBit - Reset the stream to the specified bit number.
 323   void JumpToBit(uint64_t BitNo) {
 324     uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
 325     unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
 326     assert(canSkipToPos(ByteNo) && "Invalid location");
 327
 328     // Move the cursor to the right word.
 329     NextChar = ByteNo;
 330     BitsInCurWord = 0;
 331     CurWord = 0;
 332
 333     // Skip over any bits that are already consumed.
 334     if (WordBitNo) {
 335       if (sizeof(word_t) > 4)
 336         Read64(WordBitNo);
 337       else
 338         Read(WordBitNo);
 339     }
 340   }
 341
 342
 343   uint32_t Read(unsigned NumBits) {
 344     assert(NumBits && NumBits <= 32 &&
 345            "Cannot return zero or more than 32 bits!");
 346
 347     // If the field is fully contained by CurWord, return it quickly.
 348     if (BitsInCurWord >= NumBits) {
 349       uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
 350       CurWord >>= NumBits;
 351       BitsInCurWord -= NumBits;
 352       return R;
 353     }
 354
 355     // If we run out of data, stop at the end of the stream.
 356     if (isEndPos(NextChar)) {
 357       CurWord = 0;
 358       BitsInCurWord = 0;
 359       return 0;
 360     }
 361
 362     uint32_t R = uint32_t(CurWord);
 363
 364     // Read the next word from the stream.
 365     uint8_t Array[sizeof(word_t)] = {0};
 366
 367     BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array);
 368
 369     // Handle big-endian byte-swapping if necessary.
 370     support::detail::packed_endian_specific_integral
 371       <word_t, support::little, support::unaligned> EndianValue;
 372     memcpy(&EndianValue, Array, sizeof(Array));
 373
 374     CurWord = EndianValue;
 375
 376     NextChar += sizeof(word_t);
 377
 378     // Extract NumBits-BitsInCurWord from what we just read.
 379     unsigned BitsLeft = NumBits-BitsInCurWord;
 380
 381     // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
 382     R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
 383                     << BitsInCurWord);
 384
 385     // BitsLeft bits have just been used up from CurWord.  BitsLeft is in the
 386     // range [1..32]/[1..64] so be careful how we shift.
 387     if (BitsLeft != sizeof(word_t)*8)
 388       CurWord >>= BitsLeft;
 389     else
 390       CurWord = 0;
 391     BitsInCurWord = sizeof(word_t)*8-BitsLeft;
 392     return R;
 393   }
 394
 395   uint64_t Read64(unsigned NumBits) {
 396     if (NumBits <= 32) return Read(NumBits);
 397
 398     uint64_t V = Read(32);
 399     return V | (uint64_t)Read(NumBits-32) << 32;
 400   }
 401
 402   uint32_t ReadVBR(unsigned NumBits) {
 403     uint32_t Piece = Read(NumBits);
 404     if ((Piece & (1U << (NumBits-1))) == 0)
 405       return Piece;
 406
 407     uint32_t Result = 0;
 408     unsigned NextBit = 0;
 409     while (1) {
 410       Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
 411
 412       if ((Piece & (1U << (NumBits-1))) == 0)
 413         return Result;
 414
 415       NextBit += NumBits-1;
 416       Piece = Read(NumBits);
 417     }
 418   }
 419
 420   // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
 421   // chunk size of the VBR must still be <= 32 bits though.
 422   uint64_t ReadVBR64(unsigned NumBits) {
 423     uint32_t Piece = Read(NumBits);
 424     if ((Piece & (1U << (NumBits-1))) == 0)
 425       return uint64_t(Piece);
 426
 427     uint64_t Result = 0;
 428     unsigned NextBit = 0;
 429     while (1) {
 430       Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
 431
 432       if ((Piece & (1U << (NumBits-1))) == 0)
 433         return Result;
 434
 435       NextBit += NumBits-1;
 436       Piece = Read(NumBits);
 437     }
 438   }
 439
 440 private:
 441   void SkipToFourByteBoundary() {
 442     // If word_t is 64-bits and if we've read less than 32 bits, just dump
 443     // the bits we have up to the next 32-bit boundary.
 444     if (sizeof(word_t) > 4 &&
 445         BitsInCurWord >= 32) {
 446       CurWord >>= BitsInCurWord-32;
 447       BitsInCurWord = 32;
 448       return;
 449     }
 450
 451     BitsInCurWord = 0;
 452     CurWord = 0;
 453   }
 454 public:
 455
 456   unsigned ReadCode() {
 457     return Read(CurCodeSize);
 458   }
 459
 460
 461   // Block header:
 462   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 463
 464   /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
 465   /// the block.
 466   unsigned ReadSubBlockID() {
 467     return ReadVBR(bitc::BlockIDWidth);
 468   }
 469
 470   /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
 471   /// over the body of this block.  If the block record is malformed, return
 472   /// true.
 473   bool SkipBlock() {
 474     // Read and ignore the codelen value.  Since we are skipping this block, we
 475     // don't care what code widths are used inside of it.
 476     ReadVBR(bitc::CodeLenWidth);
 477     SkipToFourByteBoundary();
 478     unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
 479
 480     // Check that the block wasn't partially defined, and that the offset isn't
 481     // bogus.
 482     size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
 483     if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
 484       return true;
 485
 486     JumpToBit(SkipTo);
 487     return false;
 488   }
 489
 490   /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
 491   /// the block, and return true if the block has an error.
 492   bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
 493
 494   bool ReadBlockEnd() {
 495     if (BlockScope.empty()) return true;
 496
 497     // Block tail:
 498     //    [END_BLOCK, <align4bytes>]
 499     SkipToFourByteBoundary();
 500
 501     popBlockScope();
 502     return false;
 503   }
 504
 505 private:
 506
 507   void popBlockScope() {
 508     CurCodeSize = BlockScope.back().PrevCodeSize;
 509
 510     CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
 511     BlockScope.pop_back();
 512   }
 513
 514   //===--------------------------------------------------------------------===//
 515   // Record Processing
 516   //===--------------------------------------------------------------------===//
 517
 518 private:
 519   void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
 520                               SmallVectorImpl<uint64_t> &Vals);
 521   void readAbbreviatedField(const BitCodeAbbrevOp &Op,
 522                             SmallVectorImpl<uint64_t> &Vals);
 523   void skipAbbreviatedField(const BitCodeAbbrevOp &Op);
 524
 525 public:
 526
 527   /// getAbbrev - Return the abbreviation for the specified AbbrevId.
 528   const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
 529     unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
 530     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
 531     return CurAbbrevs[AbbrevNo].get();
 532   }
 533
 534   /// skipRecord - Read the current record and discard it.
 535   void skipRecord(unsigned AbbrevID);
 536
 537   unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
 538                       StringRef *Blob = nullptr);
 539
 540   //===--------------------------------------------------------------------===//
 541   // Abbrev Processing
 542   //===--------------------------------------------------------------------===//
 543   void ReadAbbrevRecord();
 544
 545   bool ReadBlockInfoBlock();
 546 };
 547
 548 } // End llvm namespace
 549
 550 #endif