include/llvm/Bitcode/BitstreamWriter.h

   1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This header defines the BitstreamWriter class.  This class can be used to
  11 // write an arbitrary bitstream, regardless of its contents.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_BITCODE_BITSTREAMWRITER_H
  16 #define LLVM_BITCODE_BITSTREAMWRITER_H
  17
  18 #include "llvm/ADT/SmallVector.h"
  19 #include "llvm/ADT/StringRef.h"
  20 #include "llvm/Bitcode/BitCodes.h"
  21 #include "llvm/Support/Endian.h"
  22 #include <vector>
  23
  24 namespace llvm {
  25
  26 class BitstreamWriter {
  27   SmallVectorImpl<char> &Out;
  28
  29   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
  30   unsigned CurBit;
  31
  32   /// CurValue - The current value.  Only bits < CurBit are valid.
  33   uint32_t CurValue;
  34
  35   /// CurCodeSize - This is the declared size of code values used for the
  36   /// current block, in bits.
  37   unsigned CurCodeSize;
  38
  39   /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
  40   /// selected BLOCK ID.
  41   unsigned BlockInfoCurBID;
  42
  43   /// CurAbbrevs - Abbrevs installed at in this block.
  44   std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
  45
  46   struct Block {
  47     unsigned PrevCodeSize;
  48     unsigned StartSizeWord;
  49     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
  50     Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
  51   };
  52
  53   /// BlockScope - This tracks the current blocks that we have entered.
  54   std::vector<Block> BlockScope;
  55
  56   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
  57   /// These describe abbreviations that all blocks of the specified ID inherit.
  58   struct BlockInfo {
  59     unsigned BlockID;
  60     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
  61   };
  62   std::vector<BlockInfo> BlockInfoRecords;
  63
  64   // BackpatchWord - Backpatch a 32-bit word in the output with the specified
  65   // value.
  66   void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
  67     support::endian::write32le(&Out[ByteNo], NewWord);
  68   }
  69
  70   void WriteByte(unsigned char Value) {
  71     Out.push_back(Value);
  72   }
  73
  74   void WriteWord(unsigned Value) {
  75     Value = support::endian::byte_swap<uint32_t, support::little>(Value);
  76     Out.append(reinterpret_cast<const char *>(&Value),
  77                reinterpret_cast<const char *>(&Value + 1));
  78   }
  79
  80   unsigned GetBufferOffset() const {
  81     return Out.size();
  82   }
  83
  84   unsigned GetWordIndex() const {
  85     unsigned Offset = GetBufferOffset();
  86     assert((Offset & 3) == 0 && "Not 32-bit aligned");
  87     return Offset / 4;
  88   }
  89
  90 public:
  91   explicit BitstreamWriter(SmallVectorImpl<char> &O)
  92     : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
  93
  94   ~BitstreamWriter() {
  95     assert(CurBit == 0 && "Unflushed data remaining");
  96     assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
  97   }
  98
  99   /// \brief Retrieve the current position in the stream, in bits.
 100   uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
 101
 102   //===--------------------------------------------------------------------===//
 103   // Basic Primitives for emitting bits to the stream.
 104   //===--------------------------------------------------------------------===//
 105
 106   void Emit(uint32_t Val, unsigned NumBits) {
 107     assert(NumBits && NumBits <= 32 && "Invalid value size!");
 108     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
 109     CurValue |= Val << CurBit;
 110     if (CurBit + NumBits < 32) {
 111       CurBit += NumBits;
 112       return;
 113     }
 114
 115     // Add the current word.
 116     WriteWord(CurValue);
 117
 118     if (CurBit)
 119       CurValue = Val >> (32-CurBit);
 120     else
 121       CurValue = 0;
 122     CurBit = (CurBit+NumBits) & 31;
 123   }
 124
 125   void Emit64(uint64_t Val, unsigned NumBits) {
 126     if (NumBits <= 32)
 127       Emit((uint32_t)Val, NumBits);
 128     else {
 129       Emit((uint32_t)Val, 32);
 130       Emit((uint32_t)(Val >> 32), NumBits-32);
 131     }
 132   }
 133
 134   void FlushToWord() {
 135     if (CurBit) {
 136       WriteWord(CurValue);
 137       CurBit = 0;
 138       CurValue = 0;
 139     }
 140   }
 141
 142   void EmitVBR(uint32_t Val, unsigned NumBits) {
 143     assert(NumBits <= 32 && "Too many bits to emit!");
 144     uint32_t Threshold = 1U << (NumBits-1);
 145
 146     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 147     while (Val >= Threshold) {
 148       Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
 149       Val >>= NumBits-1;
 150     }
 151
 152     Emit(Val, NumBits);
 153   }
 154
 155   void EmitVBR64(uint64_t Val, unsigned NumBits) {
 156     assert(NumBits <= 32 && "Too many bits to emit!");
 157     if ((uint32_t)Val == Val)
 158       return EmitVBR((uint32_t)Val, NumBits);
 159
 160     uint32_t Threshold = 1U << (NumBits-1);
 161
 162     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 163     while (Val >= Threshold) {
 164       Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
 165            (1 << (NumBits-1)), NumBits);
 166       Val >>= NumBits-1;
 167     }
 168
 169     Emit((uint32_t)Val, NumBits);
 170   }
 171
 172   /// EmitCode - Emit the specified code.
 173   void EmitCode(unsigned Val) {
 174     Emit(Val, CurCodeSize);
 175   }
 176
 177   //===--------------------------------------------------------------------===//
 178   // Block Manipulation
 179   //===--------------------------------------------------------------------===//
 180
 181   /// getBlockInfo - If there is block info for the specified ID, return it,
 182   /// otherwise return null.
 183   BlockInfo *getBlockInfo(unsigned BlockID) {
 184     // Common case, the most recent entry matches BlockID.
 185     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
 186       return &BlockInfoRecords.back();
 187
 188     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
 189          i != e; ++i)
 190       if (BlockInfoRecords[i].BlockID == BlockID)
 191         return &BlockInfoRecords[i];
 192     return nullptr;
 193   }
 194
 195   void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
 196     // Block header:
 197     //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 198     EmitCode(bitc::ENTER_SUBBLOCK);
 199     EmitVBR(BlockID, bitc::BlockIDWidth);
 200     EmitVBR(CodeLen, bitc::CodeLenWidth);
 201     FlushToWord();
 202
 203     unsigned BlockSizeWordIndex = GetWordIndex();
 204     unsigned OldCodeSize = CurCodeSize;
 205
 206     // Emit a placeholder, which will be replaced when the block is popped.
 207     Emit(0, bitc::BlockSizeWidth);
 208
 209     CurCodeSize = CodeLen;
 210
 211     // Push the outer block's abbrev set onto the stack, start out with an
 212     // empty abbrev set.
 213     BlockScope.emplace_back(OldCodeSize, BlockSizeWordIndex);
 214     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 215
 216     // If there is a blockinfo for this BlockID, add all the predefined abbrevs
 217     // to the abbrev list.
 218     if (BlockInfo *Info = getBlockInfo(BlockID)) {
 219       CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
 220                         Info->Abbrevs.end());
 221     }
 222   }
 223
 224   void ExitBlock() {
 225     assert(!BlockScope.empty() && "Block scope imbalance!");
 226     const Block &B = BlockScope.back();
 227
 228     // Block tail:
 229     //    [END_BLOCK, <align4bytes>]
 230     EmitCode(bitc::END_BLOCK);
 231     FlushToWord();
 232
 233     // Compute the size of the block, in words, not counting the size field.
 234     unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
 235     unsigned ByteNo = B.StartSizeWord*4;
 236
 237     // Update the block size field in the header of this sub-block.
 238     BackpatchWord(ByteNo, SizeInWords);
 239
 240     // Restore the inner block's code size and abbrev table.
 241     CurCodeSize = B.PrevCodeSize;
 242     CurAbbrevs = std::move(B.PrevAbbrevs);
 243     BlockScope.pop_back();
 244   }
 245
 246   //===--------------------------------------------------------------------===//
 247   // Record Emission
 248   //===--------------------------------------------------------------------===//
 249
 250 private:
 251   /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
 252   /// record.  This is a no-op, since the abbrev specifies the literal to use.
 253   template<typename uintty>
 254   void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
 255     assert(Op.isLiteral() && "Not a literal");
 256     // If the abbrev specifies the literal value to use, don't emit
 257     // anything.
 258     assert(V == Op.getLiteralValue() &&
 259            "Invalid abbrev for record!");
 260   }
 261
 262   /// EmitAbbreviatedField - Emit a single scalar field value with the specified
 263   /// encoding.
 264   template<typename uintty>
 265   void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
 266     assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
 267
 268     // Encode the value as we are commanded.
 269     switch (Op.getEncoding()) {
 270     default: llvm_unreachable("Unknown encoding!");
 271     case BitCodeAbbrevOp::Fixed:
 272       if (Op.getEncodingData())
 273         Emit((unsigned)V, (unsigned)Op.getEncodingData());
 274       break;
 275     case BitCodeAbbrevOp::VBR:
 276       if (Op.getEncodingData())
 277         EmitVBR64(V, (unsigned)Op.getEncodingData());
 278       break;
 279     case BitCodeAbbrevOp::Char6:
 280       Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
 281       break;
 282     }
 283   }
 284
 285   /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
 286   /// emission code.  If BlobData is non-null, then it specifies an array of
 287   /// data that should be emitted as part of the Blob or Array operand that is
 288   /// known to exist at the end of the record.
 289   template<typename uintty>
 290   void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 291                                 StringRef Blob) {
 292     const char *BlobData = Blob.data();
 293     unsigned BlobLen = (unsigned) Blob.size();
 294     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
 295     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
 296     const BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo].get();
 297
 298     EmitCode(Abbrev);
 299
 300     unsigned RecordIdx = 0;
 301     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 302          i != e; ++i) {
 303       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 304       if (Op.isLiteral()) {
 305         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 306         EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
 307         ++RecordIdx;
 308       } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
 309         // Array case.
 310         assert(i+2 == e && "array op not second to last?");
 311         const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
 312
 313         // If this record has blob data, emit it, otherwise we must have record
 314         // entries to encode this way.
 315         if (BlobData) {
 316           assert(RecordIdx == Vals.size() &&
 317                  "Blob data and record entries specified for array!");
 318           // Emit a vbr6 to indicate the number of elements present.
 319           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 320
 321           // Emit each field.
 322           for (unsigned i = 0; i != BlobLen; ++i)
 323             EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
 324
 325           // Know that blob data is consumed for assertion below.
 326           BlobData = nullptr;
 327         } else {
 328           // Emit a vbr6 to indicate the number of elements present.
 329           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 330
 331           // Emit each field.
 332           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
 333             EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
 334         }
 335       } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
 336         // If this record has blob data, emit it, otherwise we must have record
 337         // entries to encode this way.
 338
 339         // Emit a vbr6 to indicate the number of elements present.
 340         if (BlobData) {
 341           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 342           assert(RecordIdx == Vals.size() &&
 343                  "Blob data and record entries specified for blob operand!");
 344         } else {
 345           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 346         }
 347
 348         // Flush to a 32-bit alignment boundary.
 349         FlushToWord();
 350
 351         // Emit each field as a literal byte.
 352         if (BlobData) {
 353           for (unsigned i = 0; i != BlobLen; ++i)
 354             WriteByte((unsigned char)BlobData[i]);
 355
 356           // Know that blob data is consumed for assertion below.
 357           BlobData = nullptr;
 358         } else {
 359           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
 360             assert(isUInt<8>(Vals[RecordIdx]) &&
 361                    "Value too large to emit as blob");
 362             WriteByte((unsigned char)Vals[RecordIdx]);
 363           }
 364         }
 365
 366         // Align end to 32-bits.
 367         while (GetBufferOffset() & 3)
 368           WriteByte(0);
 369       } else {  // Single scalar field.
 370         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 371         EmitAbbreviatedField(Op, Vals[RecordIdx]);
 372         ++RecordIdx;
 373       }
 374     }
 375     assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
 376     assert(BlobData == nullptr &&
 377            "Blob data specified for record that doesn't use it!");
 378   }
 379
 380 public:
 381
 382   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
 383   /// we have one to compress the output.
 384   template<typename uintty>
 385   void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
 386                   unsigned Abbrev = 0) {
 387     if (!Abbrev) {
 388       // If we don't have an abbrev to use, emit this in its fully unabbreviated
 389       // form.
 390       EmitCode(bitc::UNABBREV_RECORD);
 391       EmitVBR(Code, 6);
 392       EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
 393       for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
 394         EmitVBR64(Vals[i], 6);
 395       return;
 396     }
 397
 398     // Insert the code into Vals to treat it uniformly.
 399     Vals.insert(Vals.begin(), Code);
 400
 401     EmitRecordWithAbbrev(Abbrev, Vals);
 402   }
 403
 404   /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
 405   /// Unlike EmitRecord, the code for the record should be included in Vals as
 406   /// the first entry.
 407   template<typename uintty>
 408   void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
 409     EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
 410   }
 411
 412   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
 413   /// abbrev that includes a blob at the end.  The blob data to emit is
 414   /// specified by the pointer and length specified at the end.  In contrast to
 415   /// EmitRecord, this routine expects that the first entry in Vals is the code
 416   /// of the record.
 417   template<typename uintty>
 418   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 419                           StringRef Blob) {
 420     EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
 421   }
 422   template<typename uintty>
 423   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 424                           const char *BlobData, unsigned BlobLen) {
 425     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
 426   }
 427
 428   /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
 429   /// that end with an array.
 430   template<typename uintty>
 431   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 432                           StringRef Array) {
 433     EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
 434   }
 435   template<typename uintty>
 436   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 437                           const char *ArrayData, unsigned ArrayLen) {
 438     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
 439                                                             ArrayLen));
 440   }
 441
 442   //===--------------------------------------------------------------------===//
 443   // Abbrev Emission
 444   //===--------------------------------------------------------------------===//
 445
 446 private:
 447   // Emit the abbreviation as a DEFINE_ABBREV record.
 448   void EncodeAbbrev(BitCodeAbbrev *Abbv) {
 449     EmitCode(bitc::DEFINE_ABBREV);
 450     EmitVBR(Abbv->getNumOperandInfos(), 5);
 451     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 452          i != e; ++i) {
 453       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 454       Emit(Op.isLiteral(), 1);
 455       if (Op.isLiteral()) {
 456         EmitVBR64(Op.getLiteralValue(), 8);
 457       } else {
 458         Emit(Op.getEncoding(), 3);
 459         if (Op.hasEncodingData())
 460           EmitVBR64(Op.getEncodingData(), 5);
 461       }
 462     }
 463   }
 464 public:
 465
 466   /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
 467   /// method takes ownership of the specified abbrev.
 468   unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
 469     // Emit the abbreviation as a record.
 470     EncodeAbbrev(Abbv);
 471     CurAbbrevs.push_back(Abbv);
 472     return static_cast<unsigned>(CurAbbrevs.size())-1 +
 473       bitc::FIRST_APPLICATION_ABBREV;
 474   }
 475
 476   //===--------------------------------------------------------------------===//
 477   // BlockInfo Block Emission
 478   //===--------------------------------------------------------------------===//
 479
 480   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
 481   void EnterBlockInfoBlock(unsigned CodeWidth) {
 482     EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
 483     BlockInfoCurBID = ~0U;
 484   }
 485 private:
 486   /// SwitchToBlockID - If we aren't already talking about the specified block
 487   /// ID, emit a BLOCKINFO_CODE_SETBID record.
 488   void SwitchToBlockID(unsigned BlockID) {
 489     if (BlockInfoCurBID == BlockID) return;
 490     SmallVector<unsigned, 2> V;
 491     V.push_back(BlockID);
 492     EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
 493     BlockInfoCurBID = BlockID;
 494   }
 495
 496   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
 497     if (BlockInfo *BI = getBlockInfo(BlockID))
 498       return *BI;
 499
 500     // Otherwise, add a new record.
 501     BlockInfoRecords.emplace_back();
 502     BlockInfoRecords.back().BlockID = BlockID;
 503     return BlockInfoRecords.back();
 504   }
 505
 506 public:
 507
 508   /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
 509   /// BlockID.
 510   unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
 511     SwitchToBlockID(BlockID);
 512     EncodeAbbrev(Abbv);
 513
 514     // Add the abbrev to the specified block record.
 515     BlockInfo &Info = getOrCreateBlockInfo(BlockID);
 516     Info.Abbrevs.push_back(Abbv);
 517
 518     return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
 519   }
 520 };
 521
 522
 523 } // End llvm namespace
 524
 525 #endif