1 //===- Reader.cpp - Code to read bytecode files ---------------------------===//
3 // This library implements the functionality defined in llvm/Bytecode/Reader.h
5 // Note that this library should be as fast as possible, reentrant, and
8 // TODO: Make error message outputs be configurable depending on an option?
9 // TODO: Allow passing in an option to ignore the symbol table
11 //===----------------------------------------------------------------------===//
13 #include "ReaderInternals.h"
14 #include "llvm/Bytecode/Reader.h"
15 #include "llvm/Bytecode/Format.h"
16 #include "llvm/GlobalVariable.h"
17 #include "llvm/Module.h"
18 #include "llvm/Constants.h"
19 #include "llvm/iPHINode.h"
20 #include "llvm/iOther.h"
21 #include <sys/types.h>
31 bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) {
32 if (Ty->isPrimitiveType()) {
33 Slot = Ty->getPrimitiveID();
35 // Check the method level types first...
36 TypeValuesListTy::iterator I = find(MethodTypeValues.begin(),
37 MethodTypeValues.end(), Ty);
38 if (I != MethodTypeValues.end()) {
39 Slot = FirstDerivedTyID+ModuleTypeValues.size()+
40 (&*I - &MethodTypeValues[0]);
42 I = find(ModuleTypeValues.begin(), ModuleTypeValues.end(), Ty);
43 if (I == ModuleTypeValues.end()) return true; // Didn't find type!
44 Slot = FirstDerivedTyID + (&*I - &ModuleTypeValues[0]);
47 //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << "\n";
51 const Type *BytecodeParser::getType(unsigned ID) {
52 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID);
55 //cerr << "Looking up Type ID: " << ID << "\n";
57 const Value *D = getValue(Type::TypeTy, ID, false);
58 if (D == 0) return failure<const Type*>(0);
63 int BytecodeParser::insertValue(Value *Val, std::vector<ValueList> &ValueTab) {
65 if (getTypeSlot(Val->getType(), type)) return failure<int>(-1);
66 assert(type != Type::TypeTyID && "Types should never be insertValue'd!");
68 if (ValueTab.size() <= type)
69 ValueTab.resize(type+1, ValueList());
71 //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size()
72 // << "] = " << Val << "\n";
73 ValueTab[type].push_back(Val);
75 return ValueTab[type].size()-1;
78 Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) {
80 unsigned type; // The type plane it lives in...
82 if (getTypeSlot(Ty, type)) return failure<Value*>(0); // TODO: true
84 if (type == Type::TypeTyID) { // The 'type' plane has implicit values
85 assert(Create == false);
86 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num);
87 if (T) return (Value*)T; // Asked for a primitive type...
89 // Otherwise, derived types need offset...
90 Num -= FirstDerivedTyID;
92 // Is it a module level type?
93 if (Num < ModuleTypeValues.size())
94 return (Value*)ModuleTypeValues[Num].get();
96 // Nope, is it a method level type?
97 Num -= ModuleTypeValues.size();
98 if (Num < MethodTypeValues.size())
99 return (Value*)MethodTypeValues[Num].get();
104 if (type < ModuleValues.size()) {
105 if (Num < ModuleValues[type].size())
106 return ModuleValues[type][Num];
107 Num -= ModuleValues[type].size();
110 if (Values.size() > type && Values[type].size() > Num)
111 return Values[type][Num];
113 if (!Create) return failure<Value*>(0); // Do not create a placeholder?
116 switch (Ty->getPrimitiveID()) {
117 case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break;
118 case Type::FunctionTyID:
119 cerr << "Creating method pholder! : " << type << ":" << oNum << " "
120 << Ty->getName() << "\n";
121 d = new MethPHolder(Ty, oNum);
122 if (insertValue(d, LateResolveModuleValues) ==-1) return failure<Value*>(0);
124 default: d = new DefPHolder(Ty, oNum); break;
127 assert(d != 0 && "How did we not make something?");
128 if (insertValue(d, LateResolveValues) == -1) return failure<Value*>(0);
132 bool BytecodeParser::postResolveValues(ValueTable &ValTab) {
134 for (unsigned ty = 0; ty < ValTab.size(); ++ty) {
135 ValueList &DL = ValTab[ty];
137 while ((Size = DL.size())) {
138 unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]);
140 Value *D = DL[Size-1];
143 Value *NewDef = getValue(D->getType(), IDNumber, false);
145 Error = true; // Unresolved thinger
146 cerr << "Unresolvable reference found: <"
147 << D->getType()->getDescription() << ">:" << IDNumber << "!\n";
149 // Fixup all of the uses of this placeholder def...
150 D->replaceAllUsesWith(NewDef);
152 // Now that all the uses are gone, delete the placeholder...
153 // If we couldn't find a def (error case), then leak a little
154 delete D; // memory, 'cause otherwise we can't remove all uses!
162 bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf,
164 BB = new BasicBlock();
166 while (Buf < EndBuf) {
168 if (ParseInstruction(Buf, EndBuf, Inst)) {
170 return failure(true);
173 if (Inst == 0) { delete BB; return failure(true); }
174 if (insertValue(Inst, Values) == -1) { delete BB; return failure(true); }
176 BB->getInstList().push_back(Inst);
184 bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf,
186 while (Buf < EndBuf) {
187 // Symtab block header: [num entries][type id number]
188 unsigned NumEntries, Typ;
189 if (read_vbr(Buf, EndBuf, NumEntries) ||
190 read_vbr(Buf, EndBuf, Typ)) return failure(true);
191 const Type *Ty = getType(Typ);
192 if (Ty == 0) return failure(true);
194 BCR_TRACE(3, "Plane Type: '" << Ty << "' with " << NumEntries <<
197 for (unsigned i = 0; i < NumEntries; ++i) {
198 // Symtab entry: [def slot #][name]
200 if (read_vbr(Buf, EndBuf, slot)) return failure(true);
202 if (read(Buf, EndBuf, Name, false)) // Not aligned...
203 return failure(true);
205 Value *D = getValue(Ty, slot, false); // Find mapping...
207 BCR_TRACE(3, "FAILED LOOKUP: Slot #" << slot << "\n");
208 return failure(true);
210 BCR_TRACE(4, "Map: '" << Name << "' to #" << slot << ":" << D;
211 if (!isa<Instruction>(D)) cerr << "\n");
213 D->setName(Name, ST);
217 if (Buf > EndBuf) return failure(true);
221 // DeclareNewGlobalValue - Patch up forward references to global values in the
222 // form of ConstantPointerRef.
224 void BytecodeParser::DeclareNewGlobalValue(GlobalValue *GV, unsigned Slot) {
225 // Check to see if there is a forward reference to this global variable...
226 // if there is, eliminate it and patch the reference to use the new def'n.
227 GlobalRefsType::iterator I = GlobalRefs.find(make_pair(GV->getType(), Slot));
229 if (I != GlobalRefs.end()) {
230 GlobalVariable *OldGV = I->second; // Get the placeholder...
231 BCR_TRACE(3, "Mutating CPPR Forward Ref!\n");
233 // Loop over all of the uses of the GlobalValue. The only thing they are
234 // allowed to be at this point is ConstantPointerRef's.
235 assert(OldGV->use_size() == 1 && "Only one reference should exist!");
236 while (!OldGV->use_empty()) {
237 User *U = OldGV->use_back(); // Must be a ConstantPointerRef...
238 ConstantPointerRef *CPPR = cast<ConstantPointerRef>(U);
239 assert(CPPR->getValue() == OldGV && "Something isn't happy");
241 BCR_TRACE(4, "Mutating Forward Ref!\n");
243 // Change the const pool reference to point to the real global variable
244 // now. This should drop a use from the OldGV.
245 CPPR->mutateReference(GV);
248 // Remove GV from the module...
249 GV->getParent()->getGlobalList().remove(OldGV);
250 delete OldGV; // Delete the old placeholder
252 // Remove the map entry for the global now that it has been created...
257 bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf,
259 // Clear out the local values table...
261 if (MethodSignatureList.empty()) {
262 Error = "Function found, but FunctionSignatureList empty!";
263 return failure(true); // Unexpected method!
266 const PointerType *PMTy = MethodSignatureList.front().first; // PtrMeth
267 const FunctionType *MTy = dyn_cast<FunctionType>(PMTy->getElementType());
268 if (MTy == 0) return failure(true); // Not ptr to method!
271 if (read_vbr(Buf, EndBuf, isInternal)) return failure(true);
273 unsigned MethSlot = MethodSignatureList.front().second;
274 MethodSignatureList.pop_front();
275 Function *M = new Function(MTy, isInternal != 0);
277 BCR_TRACE(2, "METHOD TYPE: " << MTy << "\n");
279 const FunctionType::ParamTypes &Params = MTy->getParamTypes();
280 for (FunctionType::ParamTypes::const_iterator It = Params.begin();
281 It != Params.end(); ++It) {
282 Argument *FA = new Argument(*It);
283 if (insertValue(FA, Values) == -1) {
284 Error = "Error reading method arguments!\n";
285 delete M; return failure(true);
287 M->getArgumentList().push_back(FA);
290 while (Buf < EndBuf) {
292 const uchar *OldBuf = Buf;
293 if (readBlock(Buf, EndBuf, Type, Size)) {
294 Error = "Error reading Function level block!";
295 delete M; return failure(true);
299 case BytecodeFormat::ConstantPool:
300 BCR_TRACE(2, "BLOCK BytecodeFormat::ConstantPool: {\n");
301 if (ParseConstantPool(Buf, Buf+Size, Values, MethodTypeValues)) {
302 delete M; return failure(true);
306 case BytecodeFormat::BasicBlock: {
307 BCR_TRACE(2, "BLOCK BytecodeFormat::BasicBlock: {\n");
309 if (ParseBasicBlock(Buf, Buf+Size, BB) ||
310 insertValue(BB, Values) == -1) {
311 delete M; return failure(true); // Parse error... :(
314 M->getBasicBlockList().push_back(BB);
318 case BytecodeFormat::SymbolTable:
319 BCR_TRACE(2, "BLOCK BytecodeFormat::SymbolTable: {\n");
320 if (ParseSymbolTable(Buf, Buf+Size, M->getSymbolTableSure())) {
321 delete M; return failure(true);
326 BCR_TRACE(2, "BLOCK <unknown>:ignored! {\n");
328 if (OldBuf > Buf) return failure(true); // Wrap around!
331 BCR_TRACE(2, "} end block\n");
333 if (align32(Buf, EndBuf)) {
334 Error = "Error aligning Function level block!";
335 delete M; // Malformed bc file, read past end of block.
336 return failure(true);
340 if (postResolveValues(LateResolveValues) ||
341 postResolveValues(LateResolveModuleValues)) {
342 Error = "Error resolving method values!";
343 delete M; return failure(true); // Unresolvable references!
346 Value *MethPHolder = getValue(PMTy, MethSlot, false);
347 assert(MethPHolder && "Something is broken no placeholder found!");
348 assert(isa<Function>(MethPHolder) && "Not a function?");
350 unsigned type; // Type slot
351 assert(!getTypeSlot(MTy, type) && "How can meth type not exist?");
352 getTypeSlot(PMTy, type);
354 C->getFunctionList().push_back(M);
356 // Replace placeholder with the real method pointer...
357 ModuleValues[type][MethSlot] = M;
359 // Clear out method level types...
360 MethodTypeValues.clear();
362 // If anyone is using the placeholder make them use the real method instead
363 MethPHolder->replaceAllUsesWith(M);
365 // We don't need the placeholder anymore!
368 // If the method is empty, we don't need the method argument entries...
370 M->getArgumentList().clear();
372 DeclareNewGlobalValue(M, MethSlot);
377 bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End,
379 if (!MethodSignatureList.empty()) {
380 Error = "Two ModuleGlobalInfo packets found!";
381 return failure(true); // Two ModuleGlobal blocks?
384 // Read global variables...
386 if (read_vbr(Buf, End, VarType)) return failure(true);
387 while (VarType != Type::VoidTyID) { // List is terminated by Void
388 // VarType Fields: bit0 = isConstant, bit1 = hasInitializer,
389 // bit2 = isInternal, bit3+ = slot#
390 const Type *Ty = getType(VarType >> 3);
391 if (!Ty || !isa<PointerType>(Ty)) {
392 Error = "Global not pointer type! Ty = " + Ty->getDescription();
393 return failure(true);
396 const PointerType *PTy = cast<const PointerType>(Ty);
397 const Type *ElTy = PTy->getElementType();
399 Constant *Initializer = 0;
400 if (VarType & 2) { // Does it have an initalizer?
401 // Do not improvise... values must have been stored in the constant pool,
402 // which should have been read before now.
405 if (read_vbr(Buf, End, InitSlot)) return failure(true);
407 Value *V = getValue(ElTy, InitSlot, false);
408 if (V == 0) return failure(true);
409 Initializer = cast<Constant>(V);
412 // Create the global variable...
413 GlobalVariable *GV = new GlobalVariable(ElTy, VarType & 1, VarType & 4,
415 int DestSlot = insertValue(GV, ModuleValues);
416 if (DestSlot == -1) return failure(true);
418 Mod->getGlobalList().push_back(GV);
420 DeclareNewGlobalValue(GV, unsigned(DestSlot));
422 BCR_TRACE(2, "Global Variable of type: " << PTy->getDescription()
423 << " into slot #" << DestSlot << "\n");
425 if (read_vbr(Buf, End, VarType)) return failure(true);
428 // Read the method signatures for all of the methods that are coming, and
429 // create fillers in the Value tables.
430 unsigned MethSignature;
431 if (read_vbr(Buf, End, MethSignature)) return failure(true);
432 while (MethSignature != Type::VoidTyID) { // List is terminated by Void
433 const Type *Ty = getType(MethSignature);
434 if (!Ty || !isa<PointerType>(Ty) ||
435 !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
436 Error = "Function not ptr to func type! Ty = " + Ty->getDescription();
437 return failure(true);
440 // We create methods by passing the underlying FunctionType to create...
441 Ty = cast<PointerType>(Ty)->getElementType();
443 // When the ModuleGlobalInfo section is read, we load the type of each
444 // method and the 'ModuleValues' slot that it lands in. We then load a
445 // placeholder into its slot to reserve it. When the method is loaded, this
446 // placeholder is replaced.
448 // Insert the placeholder...
449 Value *Val = new MethPHolder(Ty, 0);
450 if (insertValue(Val, ModuleValues) == -1) return failure(true);
452 // Figure out which entry of its typeslot it went into...
454 if (getTypeSlot(Val->getType(), TypeSlot)) return failure(true);
456 unsigned SlotNo = ModuleValues[TypeSlot].size()-1;
458 // Keep track of this information in a linked list that is emptied as
459 // methods are loaded...
461 MethodSignatureList.push_back(
462 make_pair(cast<const PointerType>(Val->getType()), SlotNo));
463 if (read_vbr(Buf, End, MethSignature)) return failure(true);
464 BCR_TRACE(2, "Function of type: " << Ty << "\n");
467 if (align32(Buf, End)) return failure(true);
469 // This is for future proofing... in the future extra fields may be added that
470 // we don't understand, so we transparently ignore them.
476 bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf,
480 if (readBlock(Buf, EndBuf, Type, Size)) return failure(true);
481 if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) {
482 Error = "Expected Module packet!";
483 return failure(true); // Hrm, not a class?
486 BCR_TRACE(0, "BLOCK BytecodeFormat::Module: {\n");
487 MethodSignatureList.clear(); // Just in case...
489 // Read into instance variables...
490 if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return failure(true);
491 if (align32(Buf, EndBuf)) return failure(true);
492 BCR_TRACE(1, "FirstDerivedTyID = " << FirstDerivedTyID << "\n");
494 TheModule = C = new Module();
495 while (Buf < EndBuf) {
496 const uchar *OldBuf = Buf;
497 if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return failure(true); }
499 case BytecodeFormat::ConstantPool:
500 BCR_TRACE(1, "BLOCK BytecodeFormat::ConstantPool: {\n");
501 if (ParseConstantPool(Buf, Buf+Size, ModuleValues, ModuleTypeValues)) {
502 delete C; return failure(true);
506 case BytecodeFormat::ModuleGlobalInfo:
507 BCR_TRACE(1, "BLOCK BytecodeFormat::ModuleGlobalInfo: {\n");
509 if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) {
510 delete C; return failure(true);
514 case BytecodeFormat::Function: {
515 BCR_TRACE(1, "BLOCK BytecodeFormat::Function: {\n");
516 if (ParseMethod(Buf, Buf+Size, C)) {
517 delete C; return failure(true); // Error parsing method
522 case BytecodeFormat::SymbolTable:
523 BCR_TRACE(1, "BLOCK BytecodeFormat::SymbolTable: {\n");
524 if (ParseSymbolTable(Buf, Buf+Size, C->getSymbolTableSure())) {
525 delete C; return failure(true);
530 Error = "Expected Module Block!";
532 if (OldBuf > Buf) return failure(true); // Wrap around!
535 BCR_TRACE(1, "} end block\n");
536 if (align32(Buf, EndBuf)) { delete C; return failure(true); }
539 if (!MethodSignatureList.empty()) { // Expected more methods!
540 Error = "Function expected, but bytecode stream at end!";
541 return failure(true);
544 BCR_TRACE(0, "} end block\n\n");
548 Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) {
549 LateResolveValues.clear();
551 // Read and check signature...
552 if (read(Buf, EndBuf, Sig) ||
553 Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24)) {
554 Error = "Invalid bytecode signature!";
555 return failure<Module*>(0); // Invalid signature!
559 if (ParseModule(Buf, EndBuf, Result)) return 0;
564 Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) {
565 BytecodeParser Parser;
566 return Parser.ParseBytecode(Buffer, Buffer+Length);
569 // Parse and return a class file...
571 Module *ParseBytecodeFile(const std::string &Filename, std::string *ErrorStr) {
575 if (Filename != std::string("-")) { // Read from a file...
576 int FD = open(Filename.c_str(), O_RDONLY);
578 if (ErrorStr) *ErrorStr = "Error opening file!";
579 return failure<Module*>(0);
582 if (fstat(FD, &StatBuf) == -1) { close(FD); return failure<Module*>(0); }
584 int Length = StatBuf.st_size;
586 if (ErrorStr) *ErrorStr = "Error stat'ing file!";
587 close(FD); return failure<Module*>(0);
589 uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ,
591 if (Buffer == (uchar*)-1) {
592 if (ErrorStr) *ErrorStr = "Error mmapping file!";
593 close(FD); return failure<Module*>(0);
596 BytecodeParser Parser;
597 Result = Parser.ParseBytecode(Buffer, Buffer+Length);
599 munmap((char*)Buffer, Length);
601 if (ErrorStr) *ErrorStr = Parser.getError();
602 } else { // Read from stdin
605 uchar Buffer[4096], *FileData = 0;
606 while ((BlockSize = read(0, Buffer, 4))) {
607 if (BlockSize == -1) { free(FileData); return failure<Module*>(0); }
609 FileData = (uchar*)realloc(FileData, FileSize+BlockSize);
610 memcpy(FileData+FileSize, Buffer, BlockSize);
611 FileSize += BlockSize;
615 if (ErrorStr) *ErrorStr = "Standard Input empty!";
616 free(FileData); return failure<Module*>(0);
621 uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE,
622 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
623 assert((Buf != (uchar*)-1) && "mmap returned error!");
624 memcpy(Buf, FileData, FileSize);
627 uchar *Buf = FileData;
630 BytecodeParser Parser;
631 Result = Parser.ParseBytecode(Buf, Buf+FileSize);
634 munmap((char*)Buf, FileSize); // Free mmap'd data area
636 free(FileData); // Free realloc'd block of memory
639 if (ErrorStr) *ErrorStr = Parser.getError();