BitstreamCursor Stream;
uint64_t NextUnreadBit = 0;
bool SeenValueSymbolTable = false;
+ unsigned VSTOffset = 0;
std::vector<Type*> TypeList;
BitcodeReaderValueList ValueList;
std::error_code parseTypeTable();
std::error_code parseTypeTableBody();
- std::error_code parseValueSymbolTable();
+ ErrorOr<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
+ unsigned NameIndex, Triple &TT);
+ std::error_code parseValueSymbolTable(unsigned Offset = 0);
std::error_code parseConstants();
std::error_code rememberAndSkipFunctionBody();
/// Save the positions of the Metadata blocks and skip parsing the blocks.
}
}
-std::error_code BitcodeReader::parseValueSymbolTable() {
+/// Associate a value with its name from the given index in the provided record.
+ErrorOr<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
+ unsigned NameIndex, Triple &TT) {
+ SmallString<128> ValueName;
+ if (convertToString(Record, NameIndex, ValueName))
+ return error("Invalid record");
+ unsigned ValueID = Record[0];
+ if (ValueID >= ValueList.size() || !ValueList[ValueID])
+ return error("Invalid record");
+ Value *V = ValueList[ValueID];
+
+ V->setName(StringRef(ValueName.data(), ValueName.size()));
+ auto *GO = dyn_cast<GlobalObject>(V);
+ if (GO) {
+ if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
+ if (TT.isOSBinFormatMachO())
+ GO->setComdat(nullptr);
+ else
+ GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
+ }
+ }
+ return V;
+}
+
+/// Parse the value symbol table at either the current parsing location or
+/// at the given bit offset if provided.
+std::error_code BitcodeReader::parseValueSymbolTable(unsigned Offset) {
+ uint64_t CurrentBit;
+ // Pass in the Offset to distinguish between calling for the module-level
+ // VST (where we want to jump to the VST offset) and the function-level
+ // VST (where we don't).
+ if (Offset > 0) {
+ // Save the current parsing location so we can jump back at the end
+ // of the VST read.
+ CurrentBit = Stream.GetCurrentBitNo();
+ Stream.JumpToBit(Offset * 32);
+ BitstreamEntry Entry = Stream.advance();
+ assert(Entry.Kind == BitstreamEntry::SubBlock);
+ assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID);
+ }
+
+ // Compute the delta between the bitcode indices in the VST (the word offset
+ // to the word-aligned ENTER_SUBBLOCK for the function block, and that
+ // expected by the lazy reader. The reader's EnterSubBlock expects to have
+ // already read the ENTER_SUBBLOCK code (size getAbbrevIDWidth) and BlockID
+ // (size BlockIDWidth). Note that we access the stream's AbbrevID width here
+ // just before entering the VST subblock because: 1) the EnterSubBlock
+ // changes the AbbrevID width; 2) the VST block is nested within the same
+ // outer MODULE_BLOCK as the FUNCTION_BLOCKs and therefore have the same
+ // AbbrevID width before calling EnterSubBlock; and 3) when we want to
+ // jump to the FUNCTION_BLOCK using this offset later, we don't want
+ // to rely on the stream's AbbrevID width being that of the MODULE_BLOCK.
+ unsigned FuncBitcodeOffsetDelta =
+ Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
+
if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
return error("Invalid record");
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
+ if (Offset > 0)
+ Stream.JumpToBit(CurrentBit);
return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
default: // Default behavior: unknown type.
break;
case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N]
- if (convertToString(Record, 1, ValueName))
- return error("Invalid record");
- unsigned ValueID = Record[0];
- if (ValueID >= ValueList.size() || !ValueList[ValueID])
- return error("Invalid record");
- Value *V = ValueList[ValueID];
-
- V->setName(StringRef(ValueName.data(), ValueName.size()));
- if (auto *GO = dyn_cast<GlobalObject>(V)) {
- if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
- if (TT.isOSBinFormatMachO())
- GO->setComdat(nullptr);
- else
- GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
- }
+ ErrorOr<Value *> ValOrErr = recordValue(Record, 1, TT);
+ if (std::error_code EC = ValOrErr.getError())
+ return EC;
+ ValOrErr.get();
+ break;
+ }
+ case bitc::VST_CODE_FNENTRY: {
+ // VST_FNENTRY: [valueid, offset, namechar x N]
+ ErrorOr<Value *> ValOrErr = recordValue(Record, 2, TT);
+ if (std::error_code EC = ValOrErr.getError())
+ return EC;
+ Value *V = ValOrErr.get();
+
+ auto *GO = dyn_cast<GlobalObject>(V);
+ if (!GO) {
+ // If this is an alias, need to get the actual Function object
+ // it aliases, in order to set up the DeferredFunctionInfo entry below.
+ auto *GA = dyn_cast<GlobalAlias>(V);
+ if (GA)
+ GO = GA->getBaseObject();
+ assert(GO);
}
- ValueName.clear();
+
+ uint64_t FuncWordOffset = Record[1];
+ Function *F = dyn_cast<Function>(GO);
+ assert(F);
+ uint64_t FuncBitOffset = FuncWordOffset * 32;
+ DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
+ // Set the NextUnreadBit to point to the last function block.
+ // Later when parsing is resumed after function materialization,
+ // we can simply skip that last function block.
+ if (FuncBitOffset > NextUnreadBit)
+ NextUnreadBit = FuncBitOffset;
break;
}
case bitc::VST_CODE_BBENTRY: {
return EC;
break;
case bitc::VALUE_SYMTAB_BLOCK_ID:
- if (std::error_code EC = parseValueSymbolTable())
- return EC;
- SeenValueSymbolTable = true;
+ if (!SeenValueSymbolTable) {
+ // Either this is an old form VST without function index and an
+ // associated VST forward declaration record (which would have caused
+ // the VST to be jumped to and parsed before it was encountered
+ // normally in the stream), or there were no function blocks to
+ // trigger an earlier parsing of the VST.
+ assert(VSTOffset == 0 || FunctionsWithBodies.empty());
+ if (std::error_code EC = parseValueSymbolTable())
+ return EC;
+ SeenValueSymbolTable = true;
+ } else {
+ // We must have had a VST forward declaration record, which caused
+ // the parser to jump to and parse the VST earlier.
+ assert(VSTOffset > 0);
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ }
break;
case bitc::CONSTANTS_BLOCK_ID:
if (std::error_code EC = parseConstants())
SeenFirstFunctionBody = true;
}
+ if (VSTOffset > 0) {
+ // If we have a VST forward declaration record, make sure we
+ // parse the VST now if we haven't already. It is needed to
+ // set up the DeferredFunctionInfo vector for lazy reading.
+ if (!SeenValueSymbolTable) {
+ if (std::error_code EC =
+ BitcodeReader::parseValueSymbolTable(VSTOffset))
+ return EC;
+ SeenValueSymbolTable = true;
+ return std::error_code();
+ } else {
+ // If we have a VST forward declaration record, but have already
+ // parsed the VST (just above, when the first function body was
+ // encountered here), then we are resuming the parse after
+ // materializing functions. The NextUnreadBit points to the start
+ // of the last function block recorded in the VST (set when
+ // parsing the VST function entries). Skip it.
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ continue;
+ }
+ }
+
+ // Support older bitcode files that did not have the function
+ // index in the VST, nor a VST forward declaration record.
+ // Build the DeferredFunctionInfo vector on the fly.
if (std::error_code EC = rememberAndSkipFunctionBody())
return EC;
// Suspend parsing when we reach the function bodies. Subsequent
return error("Invalid record");
ValueList.shrinkTo(Record[0]);
break;
+ /// MODULE_CODE_VSTOFFSET: [offset]
+ case bitc::MODULE_CODE_VSTOFFSET:
+ if (Record.size() < 1)
+ return error("Invalid record");
+ VSTOffset = Record[0];
+ break;
}
Record.clear();
}
Function *F,
DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
while (DeferredFunctionInfoIterator->second == 0) {
+ // This is the fallback handling for the old format bitcode that
+ // didn't contain the function index in the VST. Assert if we end up
+ // here for the new format (which is the only time the VSTOffset would
+ // be non-zero).
+ assert(VSTOffset == 0);
if (Stream.AtEndOfStream())
return error("Could not find function in stream");
// ParseModule will parse the next body in the stream and set its
}
}
-// Emit top-level description of module, including target triple, inline asm,
-// descriptors for global variables, and function prototype info.
-static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
- BitstreamWriter &Stream) {
+/// Write a record that will eventually hold the word offset of the
+/// module-level VST. For now the offset is 0, which will be backpatched
+/// after the real VST is written. Returns the bit offset to backpatch.
+static uint64_t WriteValueSymbolTableForwardDecl(const ValueSymbolTable &VST,
+ BitstreamWriter &Stream) {
+ if (VST.empty()) return 0;
+
+ // Write a placeholder value in for the offset of the real VST,
+ // which is written after the function blocks so that it can include
+ // the offset of each function. The placeholder offset will be
+ // updated when the real VST is written.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET));
+ // Blocks are 32-bit aligned, so we can use a 32-bit word offset to
+ // hold the real VST offset. Must use fixed instead of VBR as we don't
+ // know how many VBR chunks to reserve ahead of time.
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Emit the placeholder
+ ArrayRef<unsigned> Vals = {llvm::bitc::MODULE_CODE_VSTOFFSET, 0};
+ Stream.EmitRecordWithAbbrev(VSTOffsetAbbrev, Vals);
+
+ // Compute and return the bit offset to the placeholder, which will be
+ // patched when the real VST is written. We can simply subtract the 32-bit
+ // fixed size from the current bit number to get the location to backpatch.
+ return Stream.GetCurrentBitNo() - 32;
+}
+
+/// Emit top-level description of module, including target triple, inline asm,
+/// descriptors for global variables, and function prototype info.
+/// Returns the bit offset to backpatch with the location of the real VST.
+static uint64_t WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
// Emit various pieces of data attached to a module.
if (!M->getTargetTriple().empty())
WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
Vals.clear();
}
+
+ uint64_t VSTOffsetPlaceholder =
+ WriteValueSymbolTableForwardDecl(M->getValueSymbolTable(), Stream);
+ return VSTOffsetPlaceholder;
}
static uint64_t GetOptimizationFlags(const Value *V) {
return SE_Fixed7;
}
-// Emit names for globals/functions etc.
-static void WriteValueSymbolTable(const ValueSymbolTable &VST,
- const ValueEnumerator &VE,
- BitstreamWriter &Stream) {
- if (VST.empty()) return;
+/// Emit names for globals/functions etc. The VSTOffsetPlaceholder,
+/// BitcodeStartBit and FunctionIndex are only passed for the module-level
+/// VST, where we are including a function bitcode index and need to
+/// backpatch the VST forward declaration record.
+static void WriteValueSymbolTable(
+ const ValueSymbolTable &VST, const ValueEnumerator &VE,
+ BitstreamWriter &Stream, uint64_t VSTOffsetPlaceholder = 0,
+ uint64_t BitcodeStartBit = 0,
+ DenseMap<const Function *, uint64_t> *FunctionIndex = nullptr) {
+ if (VST.empty()) {
+ // WriteValueSymbolTableForwardDecl should have returned early as
+ // well. Ensure this handling remains in sync by asserting that
+ // the placeholder offset is not set.
+ assert(VSTOffsetPlaceholder == 0);
+ return;
+ }
+
+ if (VSTOffsetPlaceholder > 0) {
+ // Get the offset of the VST we are writing, and backpatch it into
+ // the VST forward declaration record.
+ uint64_t VSTOffset = Stream.GetCurrentBitNo();
+ // The BitcodeStartBit was the stream offset of the actual bitcode
+ // (e.g. excluding any initial darwin header).
+ VSTOffset -= BitcodeStartBit;
+ assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
+ Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32);
+ }
+
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+ // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY
+ // records, which are not used in the per-function VSTs.
+ unsigned FnEntry8BitAbbrev;
+ unsigned FnEntry7BitAbbrev;
+ unsigned FnEntry6BitAbbrev;
+ if (VSTOffsetPlaceholder > 0) {
+ // 8-bit fixed-width VST_FNENTRY function strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // 7-bit fixed width VST_FNENTRY function strings.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // 6-bit char6 VST_FNENTRY function strings.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+ }
+
// FIXME: Set up the abbrev, we know how many values there are!
// FIXME: We know if the type names can use 7-bit ascii.
SmallVector<unsigned, 64> NameVals;
for (const ValueName &Name : VST) {
-
// Figure out the encoding to use for the name.
StringEncoding Bits =
getStringEncoding(Name.getKeyData(), Name.getKeyLength());
unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+ NameVals.push_back(VE.getValueID(Name.getValue()));
+
+ Function *F = dyn_cast<Function>(Name.getValue());
+ if (!F) {
+ // If value is an alias, need to get the aliased base object to
+ // see if it is a function.
+ auto *GA = dyn_cast<GlobalAlias>(Name.getValue());
+ if (GA && GA->getBaseObject())
+ F = dyn_cast<Function>(GA->getBaseObject());
+ }
// VST_ENTRY: [valueid, namechar x N]
+ // VST_FNENTRY: [valueid, funcoffset, namechar x N]
// VST_BBENTRY: [bbid, namechar x N]
unsigned Code;
if (isa<BasicBlock>(Name.getValue())) {
Code = bitc::VST_CODE_BBENTRY;
if (Bits == SE_Char6)
AbbrevToUse = VST_BBENTRY_6_ABBREV;
+ } else if (F && !F->isDeclaration()) {
+ // Must be the module-level VST, where we pass in the Index and
+ // have a VSTOffsetPlaceholder. The function-level VST should not
+ // contain any Function symbols.
+ assert(FunctionIndex);
+ assert(VSTOffsetPlaceholder > 0);
+
+ // Save the word offset of the function (from the start of the
+ // actual bitcode written to the stream).
+ assert(FunctionIndex->count(F) == 1);
+ uint64_t BitcodeIndex = (*FunctionIndex)[F] - BitcodeStartBit;
+ assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
+ NameVals.push_back(BitcodeIndex / 32);
+
+ Code = bitc::VST_CODE_FNENTRY;
+ AbbrevToUse = FnEntry8BitAbbrev;
+ if (Bits == SE_Char6)
+ AbbrevToUse = FnEntry6BitAbbrev;
+ else if (Bits == SE_Fixed7)
+ AbbrevToUse = FnEntry7BitAbbrev;
} else {
Code = bitc::VST_CODE_ENTRY;
if (Bits == SE_Char6)
AbbrevToUse = VST_ENTRY_7_ABBREV;
}
- NameVals.push_back(VE.getValueID(Name.getValue()));
for (const char *P = Name.getKeyData(),
*E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
NameVals.push_back((unsigned char)*P);
/// WriteFunction - Emit a function body to the module stream.
static void WriteFunction(const Function &F, ValueEnumerator &VE,
- BitstreamWriter &Stream) {
+ BitstreamWriter &Stream,
+ DenseMap<const Function *, uint64_t> &FunctionIndex) {
+ // Save the bitcode index of the start of this function block for recording
+ // in the VST.
+ uint64_t BitcodeIndex = Stream.GetCurrentBitNo();
+ FunctionIndex[&F] = BitcodeIndex;
+
Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
VE.incorporateFunction(F);
/// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream,
- bool ShouldPreserveUseListOrder) {
+ bool ShouldPreserveUseListOrder,
+ uint64_t BitcodeStartBit) {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
SmallVector<unsigned, 1> Vals;
// Emit top-level description of module, including target triple, inline asm,
// descriptors for global variables, and function prototype info.
- WriteModuleInfo(M, VE, Stream);
+ uint64_t VSTOffsetPlaceholder = WriteModuleInfo(M, VE, Stream);
// Emit constants.
WriteModuleConstants(VE, Stream);
// Emit metadata.
WriteModuleMetadataStore(M, Stream);
- // Emit names for globals/functions etc.
- WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
-
// Emit module-level use-lists.
if (VE.shouldPreserveUseListOrder())
WriteUseListBlock(nullptr, VE, Stream);
// Emit function bodies.
+ DenseMap<const Function *, uint64_t> FunctionIndex;
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
if (!F->isDeclaration())
- WriteFunction(*F, VE, Stream);
+ WriteFunction(*F, VE, Stream, FunctionIndex);
+
+ WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream,
+ VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex);
Stream.ExitBlock();
}
// Emit the module into the buffer.
{
BitstreamWriter Stream(Buffer);
+ // Save the start bit of the actual bitcode, in case there is space
+ // saved at the start for the darwin header above. The reader stream
+ // will start at the bitcode, and we need the offset of the VST
+ // to line up.
+ uint64_t BitcodeStartBit = Stream.GetCurrentBitNo();
// Emit the file header.
Stream.Emit((unsigned)'B', 8);
Stream.Emit(0xD, 4);
// Emit the module.
- WriteModule(M, Stream, ShouldPreserveUseListOrder);
+ WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit);
}
if (TT.isOSDarwin())