include/llvm/Bitcode/BitstreamWriter.h

   1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This header defines the BitstreamWriter class.  This class can be used to
  11 // write an arbitrary bitstream, regardless of its contents.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_BITCODE_BITSTREAMWRITER_H
  16 #define LLVM_BITCODE_BITSTREAMWRITER_H
  17
  18 #include "llvm/ADT/SmallVector.h"
  19 #include "llvm/ADT/StringRef.h"
  20 #include "llvm/Bitcode/BitCodes.h"
  21 #include <vector>
  22
  23 namespace llvm {
  24
  25 class BitstreamWriter {
  26   SmallVectorImpl<char> &Out;
  27
  28   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
  29   unsigned CurBit;
  30
  31   /// CurValue - The current value.  Only bits < CurBit are valid.
  32   uint32_t CurValue;
  33
  34   /// CurCodeSize - This is the declared size of code values used for the
  35   /// current block, in bits.
  36   unsigned CurCodeSize;
  37
  38   /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
  39   /// selected BLOCK ID.
  40   unsigned BlockInfoCurBID;
  41
  42   /// CurAbbrevs - Abbrevs installed at in this block.
  43   std::vector<BitCodeAbbrev*> CurAbbrevs;
  44
  45   struct Block {
  46     unsigned PrevCodeSize;
  47     unsigned StartSizeWord;
  48     std::vector<BitCodeAbbrev*> PrevAbbrevs;
  49     Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
  50   };
  51
  52   /// BlockScope - This tracks the current blocks that we have entered.
  53   std::vector<Block> BlockScope;
  54
  55   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
  56   /// These describe abbreviations that all blocks of the specified ID inherit.
  57   struct BlockInfo {
  58     unsigned BlockID;
  59     std::vector<BitCodeAbbrev*> Abbrevs;
  60   };
  61   std::vector<BlockInfo> BlockInfoRecords;
  62
  63   // BackpatchWord - Backpatch a 32-bit word in the output with the specified
  64   // value.
  65   void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
  66     Out[ByteNo++] = (unsigned char)(NewWord >>  0);
  67     Out[ByteNo++] = (unsigned char)(NewWord >>  8);
  68     Out[ByteNo++] = (unsigned char)(NewWord >> 16);
  69     Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
  70   }
  71
  72   void WriteByte(unsigned char Value) {
  73     Out.push_back(Value);
  74   }
  75
  76   void WriteWord(unsigned Value) {
  77     unsigned char Bytes[4] = {
  78       (unsigned char)(Value >>  0),
  79       (unsigned char)(Value >>  8),
  80       (unsigned char)(Value >> 16),
  81       (unsigned char)(Value >> 24) };
  82     Out.append(&Bytes[0], &Bytes[4]);
  83   }
  84
  85   unsigned GetBufferOffset() const {
  86     return Out.size();
  87   }
  88
  89   unsigned GetWordIndex() const {
  90     unsigned Offset = GetBufferOffset();
  91     assert((Offset & 3) == 0 && "Not 32-bit aligned");
  92     return Offset / 4;
  93   }
  94
  95 public:
  96   explicit BitstreamWriter(SmallVectorImpl<char> &O)
  97     : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
  98
  99   ~BitstreamWriter() {
 100     assert(CurBit == 0 && "Unflused data remaining");
 101     assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
 102
 103     // Free the BlockInfoRecords.
 104     while (!BlockInfoRecords.empty()) {
 105       BlockInfo &Info = BlockInfoRecords.back();
 106       // Free blockinfo abbrev info.
 107       for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
 108            i != e; ++i)
 109         Info.Abbrevs[i]->dropRef();
 110       BlockInfoRecords.pop_back();
 111     }
 112   }
 113
 114   /// \brief Retrieve the current position in the stream, in bits.
 115   uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
 116
 117   //===--------------------------------------------------------------------===//
 118   // Basic Primitives for emitting bits to the stream.
 119   //===--------------------------------------------------------------------===//
 120
 121   void Emit(uint32_t Val, unsigned NumBits) {
 122     assert(NumBits && NumBits <= 32 && "Invalid value size!");
 123     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
 124     CurValue |= Val << CurBit;
 125     if (CurBit + NumBits < 32) {
 126       CurBit += NumBits;
 127       return;
 128     }
 129
 130     // Add the current word.
 131     WriteWord(CurValue);
 132
 133     if (CurBit)
 134       CurValue = Val >> (32-CurBit);
 135     else
 136       CurValue = 0;
 137     CurBit = (CurBit+NumBits) & 31;
 138   }
 139
 140   void Emit64(uint64_t Val, unsigned NumBits) {
 141     if (NumBits <= 32)
 142       Emit((uint32_t)Val, NumBits);
 143     else {
 144       Emit((uint32_t)Val, 32);
 145       Emit((uint32_t)(Val >> 32), NumBits-32);
 146     }
 147   }
 148
 149   void FlushToWord() {
 150     if (CurBit) {
 151       WriteWord(CurValue);
 152       CurBit = 0;
 153       CurValue = 0;
 154     }
 155   }
 156
 157   void EmitVBR(uint32_t Val, unsigned NumBits) {
 158     assert(NumBits <= 32 && "Too many bits to emit!");
 159     uint32_t Threshold = 1U << (NumBits-1);
 160
 161     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 162     while (Val >= Threshold) {
 163       Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
 164       Val >>= NumBits-1;
 165     }
 166
 167     Emit(Val, NumBits);
 168   }
 169
 170   void EmitVBR64(uint64_t Val, unsigned NumBits) {
 171     assert(NumBits <= 32 && "Too many bits to emit!");
 172     if ((uint32_t)Val == Val)
 173       return EmitVBR((uint32_t)Val, NumBits);
 174
 175     uint32_t Threshold = 1U << (NumBits-1);
 176
 177     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 178     while (Val >= Threshold) {
 179       Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
 180            (1 << (NumBits-1)), NumBits);
 181       Val >>= NumBits-1;
 182     }
 183
 184     Emit((uint32_t)Val, NumBits);
 185   }
 186
 187   /// EmitCode - Emit the specified code.
 188   void EmitCode(unsigned Val) {
 189     Emit(Val, CurCodeSize);
 190   }
 191
 192   //===--------------------------------------------------------------------===//
 193   // Block Manipulation
 194   //===--------------------------------------------------------------------===//
 195
 196   /// getBlockInfo - If there is block info for the specified ID, return it,
 197   /// otherwise return null.
 198   BlockInfo *getBlockInfo(unsigned BlockID) {
 199     // Common case, the most recent entry matches BlockID.
 200     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
 201       return &BlockInfoRecords.back();
 202
 203     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
 204          i != e; ++i)
 205       if (BlockInfoRecords[i].BlockID == BlockID)
 206         return &BlockInfoRecords[i];
 207     return 0;
 208   }
 209
 210   void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
 211     // Block header:
 212     //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 213     EmitCode(bitc::ENTER_SUBBLOCK);
 214     EmitVBR(BlockID, bitc::BlockIDWidth);
 215     EmitVBR(CodeLen, bitc::CodeLenWidth);
 216     FlushToWord();
 217
 218     unsigned BlockSizeWordIndex = GetWordIndex();
 219     unsigned OldCodeSize = CurCodeSize;
 220
 221     // Emit a placeholder, which will be replaced when the block is popped.
 222     Emit(0, bitc::BlockSizeWidth);
 223
 224     CurCodeSize = CodeLen;
 225
 226     // Push the outer block's abbrev set onto the stack, start out with an
 227     // empty abbrev set.
 228     BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex));
 229     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 230
 231     // If there is a blockinfo for this BlockID, add all the predefined abbrevs
 232     // to the abbrev list.
 233     if (BlockInfo *Info = getBlockInfo(BlockID)) {
 234       for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
 235            i != e; ++i) {
 236         CurAbbrevs.push_back(Info->Abbrevs[i]);
 237         Info->Abbrevs[i]->addRef();
 238       }
 239     }
 240   }
 241
 242   void ExitBlock() {
 243     assert(!BlockScope.empty() && "Block scope imbalance!");
 244
 245     // Delete all abbrevs.
 246     for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
 247          i != e; ++i)
 248       CurAbbrevs[i]->dropRef();
 249
 250     const Block &B = BlockScope.back();
 251
 252     // Block tail:
 253     //    [END_BLOCK, <align4bytes>]
 254     EmitCode(bitc::END_BLOCK);
 255     FlushToWord();
 256
 257     // Compute the size of the block, in words, not counting the size field.
 258     unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
 259     unsigned ByteNo = B.StartSizeWord*4;
 260
 261     // Update the block size field in the header of this sub-block.
 262     BackpatchWord(ByteNo, SizeInWords);
 263
 264     // Restore the inner block's code size and abbrev table.
 265     CurCodeSize = B.PrevCodeSize;
 266     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 267     BlockScope.pop_back();
 268   }
 269
 270   //===--------------------------------------------------------------------===//
 271   // Record Emission
 272   //===--------------------------------------------------------------------===//
 273
 274 private:
 275   /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
 276   /// record.  This is a no-op, since the abbrev specifies the literal to use.
 277   template<typename uintty>
 278   void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
 279     assert(Op.isLiteral() && "Not a literal");
 280     // If the abbrev specifies the literal value to use, don't emit
 281     // anything.
 282     assert(V == Op.getLiteralValue() &&
 283            "Invalid abbrev for record!");
 284   }
 285
 286   /// EmitAbbreviatedField - Emit a single scalar field value with the specified
 287   /// encoding.
 288   template<typename uintty>
 289   void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
 290     assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
 291
 292     // Encode the value as we are commanded.
 293     switch (Op.getEncoding()) {
 294     default: llvm_unreachable("Unknown encoding!");
 295     case BitCodeAbbrevOp::Fixed:
 296       if (Op.getEncodingData())
 297         Emit((unsigned)V, (unsigned)Op.getEncodingData());
 298       break;
 299     case BitCodeAbbrevOp::VBR:
 300       if (Op.getEncodingData())
 301         EmitVBR64(V, (unsigned)Op.getEncodingData());
 302       break;
 303     case BitCodeAbbrevOp::Char6:
 304       Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
 305       break;
 306     }
 307   }
 308
 309   /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
 310   /// emission code.  If BlobData is non-null, then it specifies an array of
 311   /// data that should be emitted as part of the Blob or Array operand that is
 312   /// known to exist at the end of the record.
 313   template<typename uintty>
 314   void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 315                                 StringRef Blob) {
 316     const char *BlobData = Blob.data();
 317     unsigned BlobLen = (unsigned) Blob.size();
 318     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
 319     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
 320     BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo];
 321
 322     EmitCode(Abbrev);
 323
 324     unsigned RecordIdx = 0;
 325     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 326          i != e; ++i) {
 327       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 328       if (Op.isLiteral()) {
 329         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 330         EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
 331         ++RecordIdx;
 332       } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
 333         // Array case.
 334         assert(i+2 == e && "array op not second to last?");
 335         const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
 336
 337         // If this record has blob data, emit it, otherwise we must have record
 338         // entries to encode this way.
 339         if (BlobData) {
 340           assert(RecordIdx == Vals.size() &&
 341                  "Blob data and record entries specified for array!");
 342           // Emit a vbr6 to indicate the number of elements present.
 343           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 344
 345           // Emit each field.
 346           for (unsigned i = 0; i != BlobLen; ++i)
 347             EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
 348
 349           // Know that blob data is consumed for assertion below.
 350           BlobData = 0;
 351         } else {
 352           // Emit a vbr6 to indicate the number of elements present.
 353           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 354
 355           // Emit each field.
 356           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
 357             EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
 358         }
 359       } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
 360         // If this record has blob data, emit it, otherwise we must have record
 361         // entries to encode this way.
 362
 363         // Emit a vbr6 to indicate the number of elements present.
 364         if (BlobData) {
 365           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 366           assert(RecordIdx == Vals.size() &&
 367                  "Blob data and record entries specified for blob operand!");
 368         } else {
 369           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 370         }
 371
 372         // Flush to a 32-bit alignment boundary.
 373         FlushToWord();
 374
 375         // Emit each field as a literal byte.
 376         if (BlobData) {
 377           for (unsigned i = 0; i != BlobLen; ++i)
 378             WriteByte((unsigned char)BlobData[i]);
 379
 380           // Know that blob data is consumed for assertion below.
 381           BlobData = 0;
 382         } else {
 383           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
 384             assert(isUInt<8>(Vals[RecordIdx]) &&
 385                    "Value too large to emit as blob");
 386             WriteByte((unsigned char)Vals[RecordIdx]);
 387           }
 388         }
 389
 390         // Align end to 32-bits.
 391         while (GetBufferOffset() & 3)
 392           WriteByte(0);
 393       } else {  // Single scalar field.
 394         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 395         EmitAbbreviatedField(Op, Vals[RecordIdx]);
 396         ++RecordIdx;
 397       }
 398     }
 399     assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
 400     assert(BlobData == 0 &&
 401            "Blob data specified for record that doesn't use it!");
 402   }
 403
 404 public:
 405
 406   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
 407   /// we have one to compress the output.
 408   template<typename uintty>
 409   void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
 410                   unsigned Abbrev = 0) {
 411     if (!Abbrev) {
 412       // If we don't have an abbrev to use, emit this in its fully unabbreviated
 413       // form.
 414       EmitCode(bitc::UNABBREV_RECORD);
 415       EmitVBR(Code, 6);
 416       EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
 417       for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
 418         EmitVBR64(Vals[i], 6);
 419       return;
 420     }
 421
 422     // Insert the code into Vals to treat it uniformly.
 423     Vals.insert(Vals.begin(), Code);
 424
 425     EmitRecordWithAbbrev(Abbrev, Vals);
 426   }
 427
 428   /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
 429   /// Unlike EmitRecord, the code for the record should be included in Vals as
 430   /// the first entry.
 431   template<typename uintty>
 432   void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
 433     EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
 434   }
 435
 436   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
 437   /// abbrev that includes a blob at the end.  The blob data to emit is
 438   /// specified by the pointer and length specified at the end.  In contrast to
 439   /// EmitRecord, this routine expects that the first entry in Vals is the code
 440   /// of the record.
 441   template<typename uintty>
 442   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 443                           StringRef Blob) {
 444     EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
 445   }
 446   template<typename uintty>
 447   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 448                           const char *BlobData, unsigned BlobLen) {
 449     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
 450   }
 451
 452   /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
 453   /// that end with an array.
 454   template<typename uintty>
 455   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 456                           StringRef Array) {
 457     EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
 458   }
 459   template<typename uintty>
 460   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 461                           const char *ArrayData, unsigned ArrayLen) {
 462     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
 463                                                             ArrayLen));
 464   }
 465
 466   //===--------------------------------------------------------------------===//
 467   // Abbrev Emission
 468   //===--------------------------------------------------------------------===//
 469
 470 private:
 471   // Emit the abbreviation as a DEFINE_ABBREV record.
 472   void EncodeAbbrev(BitCodeAbbrev *Abbv) {
 473     EmitCode(bitc::DEFINE_ABBREV);
 474     EmitVBR(Abbv->getNumOperandInfos(), 5);
 475     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 476          i != e; ++i) {
 477       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 478       Emit(Op.isLiteral(), 1);
 479       if (Op.isLiteral()) {
 480         EmitVBR64(Op.getLiteralValue(), 8);
 481       } else {
 482         Emit(Op.getEncoding(), 3);
 483         if (Op.hasEncodingData())
 484           EmitVBR64(Op.getEncodingData(), 5);
 485       }
 486     }
 487   }
 488 public:
 489
 490   /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
 491   /// method takes ownership of the specified abbrev.
 492   unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
 493     // Emit the abbreviation as a record.
 494     EncodeAbbrev(Abbv);
 495     CurAbbrevs.push_back(Abbv);
 496     return static_cast<unsigned>(CurAbbrevs.size())-1 +
 497       bitc::FIRST_APPLICATION_ABBREV;
 498   }
 499
 500   //===--------------------------------------------------------------------===//
 501   // BlockInfo Block Emission
 502   //===--------------------------------------------------------------------===//
 503
 504   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
 505   void EnterBlockInfoBlock(unsigned CodeWidth) {
 506     EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
 507     BlockInfoCurBID = ~0U;
 508   }
 509 private:
 510   /// SwitchToBlockID - If we aren't already talking about the specified block
 511   /// ID, emit a BLOCKINFO_CODE_SETBID record.
 512   void SwitchToBlockID(unsigned BlockID) {
 513     if (BlockInfoCurBID == BlockID) return;
 514     SmallVector<unsigned, 2> V;
 515     V.push_back(BlockID);
 516     EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
 517     BlockInfoCurBID = BlockID;
 518   }
 519
 520   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
 521     if (BlockInfo *BI = getBlockInfo(BlockID))
 522       return *BI;
 523
 524     // Otherwise, add a new record.
 525     BlockInfoRecords.push_back(BlockInfo());
 526     BlockInfoRecords.back().BlockID = BlockID;
 527     return BlockInfoRecords.back();
 528   }
 529
 530 public:
 531
 532   /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
 533   /// BlockID.
 534   unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
 535     SwitchToBlockID(BlockID);
 536     EncodeAbbrev(Abbv);
 537
 538     // Add the abbrev to the specified block record.
 539     BlockInfo &Info = getOrCreateBlockInfo(BlockID);
 540     Info.Abbrevs.push_back(Abbv);
 541
 542     return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
 543   }
 544 };
 545
 546
 547 } // End llvm namespace
 548
 549 #endif