From: Reid Spencer Date: Tue, 29 Jun 2004 23:29:38 +0000 (+0000) Subject: MAJOR REWRITE. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=060d25d1dcf0b917e6d6a840b1d9082552e9c842;p=oota-llvm.git MAJOR REWRITE. - stop passing Buf/BufEnd to every function (now member vars) - internalize things that used to be in a header file that no one else included/needed. - Remove defunct BCR_TRACE lines - Standardize error handling with the PARSE_ERROR macro. - Integrate ConstantReader.cpp and InstructionReader.cpp and reorgnize the definition order so that gcc has a chance at optimizing this module - Standardize case and style of method names. - Eliminate unneeded header files - Prepare for Type != Value (bug122) change by splitting Types into their own data structures. - Implement the BytecodeHandler interface calls. - Provide default implementation of BytecodeHandler interface. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@14494 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp index 2ca8a99f4cd..353f29c4b2a 100644 --- a/lib/Bytecode/Reader/Reader.cpp +++ b/lib/Bytecode/Reader/Reader.cpp @@ -16,45 +16,163 @@ // //===----------------------------------------------------------------------===// -#include "ReaderInternals.h" -#include "llvm/Bytecode/Reader.h" +#include "Reader.h" +#include "llvm/Bytecode/BytecodeHandler.h" +#include "llvm/BasicBlock.h" +#include "llvm/Constants.h" +#include "llvm/iMemory.h" +#include "llvm/iOther.h" +#include "llvm/iPHINode.h" +#include "llvm/iTerminators.h" #include "llvm/Bytecode/Format.h" -#include "llvm/Module.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "Support/StringExtras.h" +#include + using namespace llvm; -unsigned BytecodeParser::getTypeSlot(const Type *Ty) { - if (Ty->isPrimitiveType()) - return Ty->getTypeID(); +/// A convenience macro for calling the handler. Makes maintenance easier in +/// case the interface to handler methods changes. +#define HANDLE(method) \ + if ( Handler ) Handler->handle ## method - // Scan the compaction table for the type if needed. - if (CompactionTable.size() > Type::TypeTyID) { - std::vector &Plane = CompactionTable[Type::TypeTyID]; - if (!Plane.empty()) { - std::vector::iterator I = find(Plane.begin(), Plane.end(), - const_cast(Ty)); - if (I == Plane.end()) - throw std::string("Couldn't find type specified in compaction table!"); - return Type::FirstDerivedTyID + (&*I - &Plane[0]); - } +/// A convenience macro for handling parsing errors. +#define PARSE_ERROR(inserters) { \ + std::ostringstream errormsg; \ + errormsg << inserters; \ + throw std::string(errormsg.str()); \ } - // Check the function level types first... - TypeValuesListTy::iterator I = find(FunctionTypeValues.begin(), - FunctionTypeValues.end(), Ty); - if (I != FunctionTypeValues.end()) - return Type::FirstDerivedTyID + ModuleTypeValues.size() + - (&*I - &FunctionTypeValues[0]); +/// @brief A class for maintaining the slot number definition +/// as a placeholder for the actual definition. +template +class PlaceholderDef : public SuperType { + unsigned ID; + PlaceholderDef(); // DO NOT IMPLEMENT + void operator=(const PlaceholderDef &); // DO NOT IMPLEMENT +public: + PlaceholderDef(const Type *Ty, unsigned id) : SuperType(Ty), ID(id) {} + unsigned getID() { return ID; } +}; + +struct ConstantPlaceHolderHelper : public ConstantExpr { + ConstantPlaceHolderHelper(const Type *Ty) + : ConstantExpr(Instruction::UserOp1, Constant::getNullValue(Ty), Ty) {} +}; + +typedef PlaceholderDef ConstPHolder; - I = find(ModuleTypeValues.begin(), ModuleTypeValues.end(), Ty); - if (I == ModuleTypeValues.end()) - throw std::string("Didn't find type in ModuleTypeValues."); - return Type::FirstDerivedTyID + (&*I - &ModuleTypeValues[0]); +//===----------------------------------------------------------------------===// +// Bytecode Reading Methods +//===----------------------------------------------------------------------===// + +inline bool BytecodeReader::moreInBlock() { + return At < BlockEnd; +} + +inline void BytecodeReader::checkPastBlockEnd(const char * block_name) { + if ( At > BlockEnd ) + PARSE_ERROR("Attempt to read past the end of " << block_name << " block."); } -const Type *BytecodeParser::getType(unsigned ID) { - //cerr << "Looking up Type ID: " << ID << "\n"; +inline void BytecodeReader::align32() { + BufPtr Save = At; + At = (const unsigned char *)((unsigned long)(At+3) & (~3UL)); + if ( At > Save ) + HANDLE(Alignment( At - Save )); + if (At > BlockEnd) + PARSE_ERROR("Ran out of data while aligning!"); +} +inline unsigned BytecodeReader::read_uint() { + if (At+4 > BlockEnd) + PARSE_ERROR("Ran out of data reading uint!"); + At += 4; + return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24); +} + +inline unsigned BytecodeReader::read_vbr_uint() { + unsigned Shift = 0; + unsigned Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + PARSE_ERROR("Ran out of data reading vbr_uint!"); + Result |= (unsigned)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + HANDLE(VBR32(At-Save)); + return Result; +} + +inline uint64_t BytecodeReader::read_vbr_uint64() { + unsigned Shift = 0; + uint64_t Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + PARSE_ERROR("Ran out of data reading vbr_uint64!"); + Result |= (uint64_t)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + HANDLE(VBR64(At-Save)); + return Result; +} + +inline int64_t BytecodeReader::read_vbr_int64() { + uint64_t R = read_vbr_uint64(); + if (R & 1) { + if (R != 1) + return -(int64_t)(R >> 1); + else // There is no such thing as -0 with integers. "-0" really means + // 0x8000000000000000. + return 1LL << 63; + } else + return (int64_t)(R >> 1); +} + +inline std::string BytecodeReader::read_str() { + unsigned Size = read_vbr_uint(); + const unsigned char *OldAt = At; + At += Size; + if (At > BlockEnd) // Size invalid? + PARSE_ERROR("Ran out of data reading a string!"); + return std::string((char*)OldAt, Size); +} + +inline void BytecodeReader::read_data(void *Ptr, void *End) { + unsigned char *Start = (unsigned char *)Ptr; + unsigned Amount = (unsigned char *)End - Start; + if (At+Amount > BlockEnd) + PARSE_ERROR("Ran out of data!"); + std::copy(At, At+Amount, Start); + At += Amount; +} + +inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) { + Type = read_uint(); + Size = read_uint(); + BlockStart = At; + if ( At + Size > BlockEnd ) + PARSE_ERROR("Attempt to size a block past end of memory"); + BlockEnd = At + Size; + HANDLE(Block( Type, BlockStart, Size )); +} + +//===----------------------------------------------------------------------===// +// IR Lookup Methods +//===----------------------------------------------------------------------===// + +inline bool BytecodeReader::hasImplicitNull(unsigned TyID ) { + if (!hasExplicitPrimitiveZeros) + return TyID != Type::LabelTyID && TyID != Type::TypeTyID && + TyID != Type::VoidTyID; + return TyID >= Type::FirstDerivedTyID; +} + +const Type *BytecodeReader::getType(unsigned ID) { if (ID < Type::FirstDerivedTyID) if (const Type *T = Type::getPrimitiveType((Type::TypeID)ID)) return T; // Asked for a primitive type... @@ -62,98 +180,124 @@ const Type *BytecodeParser::getType(unsigned ID) { // Otherwise, derived types need offset... ID -= Type::FirstDerivedTyID; - if (CompactionTable.size() > Type::TypeTyID && - !CompactionTable[Type::TypeTyID].empty()) { - if (ID >= CompactionTable[Type::TypeTyID].size()) - throw std::string("Type ID out of range for compaction table!"); - return cast(CompactionTable[Type::TypeTyID][ID]); + if (!CompactionTypes.empty()) { + if (ID >= CompactionTypes.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypes[ID]; } // Is it a module-level type? - if (ID < ModuleTypeValues.size()) - return ModuleTypeValues[ID].get(); + if (ID < ModuleTypes.size()) + return ModuleTypes[ID].get(); - // Nope, is it a function-level type? - ID -= ModuleTypeValues.size(); - if (ID < FunctionTypeValues.size()) - return FunctionTypeValues[ID].get(); + // Nope, is it a function-level type? + ID -= ModuleTypes.size(); + if (ID < FunctionTypes.size()) + return FunctionTypes[ID].get(); - throw std::string("Illegal type reference!"); + PARSE_ERROR("Illegal type reference!"); + return Type::VoidTy; } -static inline bool hasImplicitNull(unsigned TyID, bool EncodesPrimitiveZeros) { - if (!EncodesPrimitiveZeros) - return TyID != Type::LabelTyID && TyID != Type::TypeTyID && - TyID != Type::VoidTyID; - return TyID >= Type::FirstDerivedTyID; -} +unsigned BytecodeReader::getTypeSlot(const Type *Ty) { + if (Ty->isPrimitiveType()) + return Ty->getTypeID(); -unsigned BytecodeParser::insertValue(Value *Val, unsigned type, - ValueTable &ValueTab) { - assert((!isa(Val) || !cast(Val)->isNullValue()) || - !hasImplicitNull(type, hasExplicitPrimitiveZeros) && - "Cannot read null values from bytecode!"); - assert(type != Type::TypeTyID && "Types should never be insertValue'd!"); + // Scan the compaction table for the type if needed. + if (!CompactionTypes.empty()) { + std::vector::const_iterator I = + find(CompactionTypes.begin(), CompactionTypes.end(), Ty); - if (ValueTab.size() <= type) - ValueTab.resize(type+1); + if (I == CompactionTypes.end()) + PARSE_ERROR("Couldn't find type specified in compaction table!"); + return Type::FirstDerivedTyID + (&*I - &CompactionTypes[0]); + } - if (!ValueTab[type]) ValueTab[type] = new ValueList(); + // Check the function level types first... + TypeListTy::iterator I = find(FunctionTypes.begin(), FunctionTypes.end(), Ty); - //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size() - // << "] = " << Val << "\n"; - ValueTab[type]->push_back(Val); + if (I != FunctionTypes.end()) + return Type::FirstDerivedTyID + ModuleTypes.size() + + (&*I - &FunctionTypes[0]); - bool HasOffset = hasImplicitNull(type, hasExplicitPrimitiveZeros); - return ValueTab[type]->size()-1 + HasOffset; + // Check the module level types now... + I = find(ModuleTypes.begin(), ModuleTypes.end(), Ty); + if (I == ModuleTypes.end()) + PARSE_ERROR("Didn't find type in ModuleTypes."); + return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } -Value *BytecodeParser::getValue(unsigned type, unsigned oNum, bool Create) { +const Type *BytecodeReader::getGlobalTableType(unsigned Slot) { + if (Slot < Type::FirstDerivedTyID) { + const Type *Ty = Type::getPrimitiveType((Type::TypeID)Slot); + assert(Ty && "Not a primitive type ID?"); + return Ty; + } + Slot -= Type::FirstDerivedTyID; + if (Slot >= ModuleTypes.size()) + PARSE_ERROR("Illegal compaction table type reference!"); + return ModuleTypes[Slot]; +} + +unsigned BytecodeReader::getGlobalTableTypeSlot(const Type *Ty) { + if (Ty->isPrimitiveType()) + return Ty->getTypeID(); + TypeListTy::iterator I = find(ModuleTypes.begin(), + ModuleTypes.end(), Ty); + if (I == ModuleTypes.end()) + PARSE_ERROR("Didn't find type in ModuleTypes."); + return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); +} + +Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { assert(type != Type::TypeTyID && "getValue() cannot get types!"); assert(type != Type::LabelTyID && "getValue() cannot get blocks!"); unsigned Num = oNum; // If there is a compaction table active, it defines the low-level numbers. // If not, the module values define the low-level numbers. - if (CompactionTable.size() > type && !CompactionTable[type].empty()) { - if (Num < CompactionTable[type].size()) - return CompactionTable[type][Num]; - Num -= CompactionTable[type].size(); + if (CompactionValues.size() > type && !CompactionValues[type].empty()) { + if (Num < CompactionValues[type].size()) + return CompactionValues[type][Num]; + Num -= CompactionValues[type].size(); } else { - // If the type plane was compactified, figure out the global type ID. + // By default, the global type id is the type id passed in unsigned GlobalTyID = type; - if (CompactionTable.size() > Type::TypeTyID && - !CompactionTable[Type::TypeTyID].empty() && - type >= Type::FirstDerivedTyID) { - std::vector &TypePlane = CompactionTable[Type::TypeTyID]; - const Type *Ty = cast(TypePlane[type-Type::FirstDerivedTyID]); - TypeValuesListTy::iterator I = - find(ModuleTypeValues.begin(), ModuleTypeValues.end(), Ty); - assert(I != ModuleTypeValues.end()); - GlobalTyID = Type::FirstDerivedTyID + (&*I - &ModuleTypeValues[0]); + + // If the type plane was compactified, figure out the global type ID + // by adding the derived type ids and the distance. + if (CompactionValues.size() > Type::TypeTyID && + !CompactionTypes.empty() && + type >= Type::FirstDerivedTyID) { + const Type *Ty = CompactionTypes[type-Type::FirstDerivedTyID]; + TypeListTy::iterator I = + find(ModuleTypes.begin(), ModuleTypes.end(), Ty); + assert(I != ModuleTypes.end()); + GlobalTyID = Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } - if (hasImplicitNull(GlobalTyID, hasExplicitPrimitiveZeros)) { + if (hasImplicitNull(GlobalTyID)) { if (Num == 0) - return Constant::getNullValue(getType(type)); + return Constant::getNullValue(getType(type)); --Num; } if (GlobalTyID < ModuleValues.size() && ModuleValues[GlobalTyID]) { if (Num < ModuleValues[GlobalTyID]->size()) - return ModuleValues[GlobalTyID]->getOperand(Num); + return ModuleValues[GlobalTyID]->getOperand(Num); Num -= ModuleValues[GlobalTyID]->size(); } } - if (Values.size() > type && Values[type] && Num < Values[type]->size()) - return Values[type]->getOperand(Num); + if (FunctionValues.size() > type && + FunctionValues[type] && + Num < FunctionValues[type]->size()) + return FunctionValues[type]->getOperand(Num); if (!Create) return 0; // Do not create a placeholder? std::pair KeyValue(type, oNum); - std::map, Value*>::iterator I = - ForwardReferences.lower_bound(KeyValue); + ForwardReferenceMap::iterator I = ForwardReferences.lower_bound(KeyValue); if (I != ForwardReferences.end() && I->first == KeyValue) return I->second; // We have already created this placeholder @@ -162,32 +306,27 @@ Value *BytecodeParser::getValue(unsigned type, unsigned oNum, bool Create) { return Val; } -/// getBasicBlock - Get a particular numbered basic block, which might be a -/// forward reference. This works together with ParseBasicBlock to handle these -/// forward references in a clean manner. -/// -BasicBlock *BytecodeParser::getBasicBlock(unsigned ID) { - // Make sure there is room in the table... - if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1); - - // First check to see if this is a backwards reference, i.e., ParseBasicBlock - // has already created this block, or if the forward reference has already - // been created. - if (ParsedBasicBlocks[ID]) - return ParsedBasicBlocks[ID]; +Value* BytecodeReader::getGlobalTableValue(const Type *Ty, unsigned SlotNo) { + // FIXME: getTypeSlot is inefficient! + unsigned TyID = getGlobalTableTypeSlot(Ty); + + if (TyID != Type::LabelTyID) { + if (SlotNo == 0) + return Constant::getNullValue(Ty); + --SlotNo; + } - // Otherwise, the basic block has not yet been created. Do so and add it to - // the ParsedBasicBlocks list. - return ParsedBasicBlocks[ID] = new BasicBlock(); + if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0 || + SlotNo >= ModuleValues[TyID]->size()) { + PARSE_ERROR("Corrupt compaction table entry!" + << TyID << ", " << SlotNo << ": " << ModuleValues.size() << ", " + << (void*)ModuleValues[TyID] << ", " + << ModuleValues[TyID]->size() << "\n"); + } + return ModuleValues[TyID]->getOperand(SlotNo); } -/// getConstantValue - Just like getValue, except that it returns a null pointer -/// only on error. It always returns a constant (meaning that if the value is -/// defined, but is not a constant, that is an error). If the specified -/// constant hasn't been parsed yet, a placeholder is defined and used. Later, -/// after the real value is parsed, the placeholder is eliminated. -/// -Constant *BytecodeParser::getConstantValue(unsigned TypeSlot, unsigned Slot) { +Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) { if (Value *V = getValue(TypeSlot, Slot, false)) if (Constant *C = dyn_cast(V)) return C; // If we already have the value parsed, just return it @@ -196,19 +335,17 @@ Constant *BytecodeParser::getConstantValue(unsigned TypeSlot, unsigned Slot) { // to infest bytecode files. return ConstantPointerRef::get(GV); else - throw std::string("Reference of a value is expected to be a constant!"); + PARSE_ERROR("Reference of a value is expected to be a constant!"); const Type *Ty = getType(TypeSlot); std::pair Key(Ty, Slot); ConstantRefsType::iterator I = ConstantFwdRefs.lower_bound(Key); if (I != ConstantFwdRefs.end() && I->first == Key) { - BCR_TRACE(5, "Previous forward ref found!\n"); return I->second; } else { // Create a placeholder for the constant reference and // keep track of the fact that we have a forward ref to recycle it - BCR_TRACE(5, "Creating new forward ref to a constant!\n"); Constant *C = new ConstPHolder(Ty, Slot); // Keep track of the fact that we have a forward ref to recycle it @@ -217,12 +354,418 @@ Constant *BytecodeParser::getConstantValue(unsigned TypeSlot, unsigned Slot) { } } +//===----------------------------------------------------------------------===// +// IR Construction Methods +//===----------------------------------------------------------------------===// + +unsigned BytecodeReader::insertValue( + Value *Val, unsigned type, ValueTable &ValueTab) { + assert((!isa(Val) || !cast(Val)->isNullValue()) || + !hasImplicitNull(type) && + "Cannot read null values from bytecode!"); + assert(type != Type::TypeTyID && "Types should never be insertValue'd!"); + + if (ValueTab.size() <= type) + ValueTab.resize(type+1); + + if (!ValueTab[type]) ValueTab[type] = new ValueList(); + + ValueTab[type]->push_back(Val); + + bool HasOffset = hasImplicitNull(type); + return ValueTab[type]->size()-1 + HasOffset; +} + +void BytecodeReader::insertArguments(Function* F ) { + const FunctionType *FT = F->getFunctionType(); + Function::aiterator AI = F->abegin(); + for (FunctionType::param_iterator It = FT->param_begin(); + It != FT->param_end(); ++It, ++AI) + insertValue(AI, getTypeSlot(AI->getType()), FunctionValues); +} + +//===----------------------------------------------------------------------===// +// Bytecode Parsing Methods +//===----------------------------------------------------------------------===// + +void BytecodeReader::ParseInstruction(std::vector &Oprnds, + BasicBlock* BB) { + BufPtr SaveAt = At; + + // Clear instruction data + Oprnds.clear(); + unsigned iType = 0; + unsigned Opcode = 0; + unsigned Op = read_uint(); + + // bits Instruction format: Common to all formats + // -------------------------- + // 01-00: Opcode type, fixed to 1. + // 07-02: Opcode + Opcode = (Op >> 2) & 63; + Oprnds.resize((Op >> 0) & 03); + + // Extract the operands + switch (Oprnds.size()) { + case 1: + // bits Instruction format: + // -------------------------- + // 19-08: Resulting type plane + // 31-20: Operand #1 (if set to (2^12-1), then zero operands) + // + iType = (Op >> 8) & 4095; + Oprnds[0] = (Op >> 20) & 4095; + if (Oprnds[0] == 4095) // Handle special encoding for 0 operands... + Oprnds.resize(0); + break; + case 2: + // bits Instruction format: + // -------------------------- + // 15-08: Resulting type plane + // 23-16: Operand #1 + // 31-24: Operand #2 + // + iType = (Op >> 8) & 255; + Oprnds[0] = (Op >> 16) & 255; + Oprnds[1] = (Op >> 24) & 255; + break; + case 3: + // bits Instruction format: + // -------------------------- + // 13-08: Resulting type plane + // 19-14: Operand #1 + // 25-20: Operand #2 + // 31-26: Operand #3 + // + iType = (Op >> 8) & 63; + Oprnds[0] = (Op >> 14) & 63; + Oprnds[1] = (Op >> 20) & 63; + Oprnds[2] = (Op >> 26) & 63; + break; + case 0: + At -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(); + Opcode >>= 2; + iType = read_vbr_uint(); + + unsigned NumOprnds = read_vbr_uint(); + Oprnds.resize(NumOprnds); + + if (NumOprnds == 0) + PARSE_ERROR("Zero-argument instruction found; this is invalid."); + + for (unsigned i = 0; i != NumOprnds; ++i) + Oprnds[i] = read_vbr_uint(); + align32(); + break; + } + + // Get the type of the instruction + const Type *InstTy = getType(iType); + + // Hae enough to inform the handler now + HANDLE(Instruction(Opcode, InstTy, Oprnds, At-SaveAt)); + + // Declare the resulting instruction we'll build. + Instruction *Result = 0; + + // Handle binary operators + if (Opcode >= Instruction::BinaryOpsBegin && + Opcode < Instruction::BinaryOpsEnd && Oprnds.size() == 2) + Result = BinaryOperator::create((Instruction::BinaryOps)Opcode, + getValue(iType, Oprnds[0]), + getValue(iType, Oprnds[1])); + + switch (Opcode) { + default: + if (Result == 0) PARSE_ERROR("Illegal instruction read!"); + break; + case Instruction::VAArg: + Result = new VAArgInst(getValue(iType, Oprnds[0]), getType(Oprnds[1])); + break; + case Instruction::VANext: + Result = new VANextInst(getValue(iType, Oprnds[0]), getType(Oprnds[1])); + break; + case Instruction::Cast: + Result = new CastInst(getValue(iType, Oprnds[0]), getType(Oprnds[1])); + break; + case Instruction::Select: + Result = new SelectInst(getValue(Type::BoolTyID, Oprnds[0]), + getValue(iType, Oprnds[1]), + getValue(iType, Oprnds[2])); + break; + case Instruction::PHI: { + if (Oprnds.size() == 0 || (Oprnds.size() & 1)) + PARSE_ERROR("Invalid phi node encountered!\n"); + + PHINode *PN = new PHINode(InstTy); + PN->op_reserve(Oprnds.size()); + for (unsigned i = 0, e = Oprnds.size(); i != e; i += 2) + PN->addIncoming(getValue(iType, Oprnds[i]), getBasicBlock(Oprnds[i+1])); + Result = PN; + break; + } + + case Instruction::Shl: + case Instruction::Shr: + Result = new ShiftInst((Instruction::OtherOps)Opcode, + getValue(iType, Oprnds[0]), + getValue(Type::UByteTyID, Oprnds[1])); + break; + case Instruction::Ret: + if (Oprnds.size() == 0) + Result = new ReturnInst(); + else if (Oprnds.size() == 1) + Result = new ReturnInst(getValue(iType, Oprnds[0])); + else + PARSE_ERROR("Unrecognized instruction!"); + break; + + case Instruction::Br: + if (Oprnds.size() == 1) + Result = new BranchInst(getBasicBlock(Oprnds[0])); + else if (Oprnds.size() == 3) + Result = new BranchInst(getBasicBlock(Oprnds[0]), + getBasicBlock(Oprnds[1]), getValue(Type::BoolTyID , Oprnds[2])); + else + PARSE_ERROR("Invalid number of operands for a 'br' instruction!"); + break; + case Instruction::Switch: { + if (Oprnds.size() & 1) + PARSE_ERROR("Switch statement with odd number of arguments!"); + + SwitchInst *I = new SwitchInst(getValue(iType, Oprnds[0]), + getBasicBlock(Oprnds[1])); + for (unsigned i = 2, e = Oprnds.size(); i != e; i += 2) + I->addCase(cast(getValue(iType, Oprnds[i])), + getBasicBlock(Oprnds[i+1])); + Result = I; + break; + } + + case Instruction::Call: { + if (Oprnds.size() == 0) + PARSE_ERROR("Invalid call instruction encountered!"); + + Value *F = getValue(iType, Oprnds[0]); + + // Check to make sure we have a pointer to function type + const PointerType *PTy = dyn_cast(F->getType()); + if (PTy == 0) PARSE_ERROR("Call to non function pointer value!"); + const FunctionType *FTy = dyn_cast(PTy->getElementType()); + if (FTy == 0) PARSE_ERROR("Call to non function pointer value!"); + + std::vector Params; + if (!FTy->isVarArg()) { + FunctionType::param_iterator It = FTy->param_begin(); + + for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) { + if (It == FTy->param_end()) + PARSE_ERROR("Invalid call instruction!"); + Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i])); + } + if (It != FTy->param_end()) + PARSE_ERROR("Invalid call instruction!"); + } else { + Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1); + + unsigned FirstVariableOperand; + if (Oprnds.size() < FTy->getNumParams()) + PARSE_ERROR("Call instruction missing operands!"); + + // Read all of the fixed arguments + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + Params.push_back(getValue(getTypeSlot(FTy->getParamType(i)),Oprnds[i])); + + FirstVariableOperand = FTy->getNumParams(); + + if ((Oprnds.size()-FirstVariableOperand) & 1) // Must be pairs of type/value + PARSE_ERROR("Invalid call instruction!"); + + for (unsigned i = FirstVariableOperand, e = Oprnds.size(); + i != e; i += 2) + Params.push_back(getValue(Oprnds[i], Oprnds[i+1])); + } + + Result = new CallInst(F, Params); + break; + } + case Instruction::Invoke: { + if (Oprnds.size() < 3) PARSE_ERROR("Invalid invoke instruction!"); + Value *F = getValue(iType, Oprnds[0]); + + // Check to make sure we have a pointer to function type + const PointerType *PTy = dyn_cast(F->getType()); + if (PTy == 0) PARSE_ERROR("Invoke to non function pointer value!"); + const FunctionType *FTy = dyn_cast(PTy->getElementType()); + if (FTy == 0) PARSE_ERROR("Invoke to non function pointer value!"); + + std::vector Params; + BasicBlock *Normal, *Except; + + if (!FTy->isVarArg()) { + Normal = getBasicBlock(Oprnds[1]); + Except = getBasicBlock(Oprnds[2]); + + FunctionType::param_iterator It = FTy->param_begin(); + for (unsigned i = 3, e = Oprnds.size(); i != e; ++i) { + if (It == FTy->param_end()) + PARSE_ERROR("Invalid invoke instruction!"); + Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i])); + } + if (It != FTy->param_end()) + PARSE_ERROR("Invalid invoke instruction!"); + } else { + Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1); + + Normal = getBasicBlock(Oprnds[0]); + Except = getBasicBlock(Oprnds[1]); + + unsigned FirstVariableArgument = FTy->getNumParams()+2; + for (unsigned i = 2; i != FirstVariableArgument; ++i) + Params.push_back(getValue(getTypeSlot(FTy->getParamType(i-2)), + Oprnds[i])); + + if (Oprnds.size()-FirstVariableArgument & 1) // Must be type/value pairs + PARSE_ERROR("Invalid invoke instruction!"); + + for (unsigned i = FirstVariableArgument; i < Oprnds.size(); i += 2) + Params.push_back(getValue(Oprnds[i], Oprnds[i+1])); + } + + Result = new InvokeInst(F, Normal, Except, Params); + break; + } + case Instruction::Malloc: + if (Oprnds.size() > 2) PARSE_ERROR("Invalid malloc instruction!"); + if (!isa(InstTy)) + PARSE_ERROR("Invalid malloc instruction!"); + + Result = new MallocInst(cast(InstTy)->getElementType(), + Oprnds.size() ? getValue(Type::UIntTyID, + Oprnds[0]) : 0); + break; + + case Instruction::Alloca: + if (Oprnds.size() > 2) PARSE_ERROR("Invalid alloca instruction!"); + if (!isa(InstTy)) + PARSE_ERROR("Invalid alloca instruction!"); + + Result = new AllocaInst(cast(InstTy)->getElementType(), + Oprnds.size() ? getValue(Type::UIntTyID, + Oprnds[0]) :0); + break; + case Instruction::Free: + if (!isa(InstTy)) + PARSE_ERROR("Invalid free instruction!"); + Result = new FreeInst(getValue(iType, Oprnds[0])); + break; + case Instruction::GetElementPtr: { + if (Oprnds.size() == 0 || !isa(InstTy)) + PARSE_ERROR("Invalid getelementptr instruction!"); + + std::vector Idx; + + const Type *NextTy = InstTy; + for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) { + const CompositeType *TopTy = dyn_cast_or_null(NextTy); + if (!TopTy) PARSE_ERROR("Invalid getelementptr instruction!"); + + unsigned ValIdx = Oprnds[i]; + unsigned IdxTy = 0; + if (!hasRestrictedGEPTypes) { + // Struct indices are always uints, sequential type indices can be any + // of the 32 or 64-bit integer types. The actual choice of type is + // encoded in the low two bits of the slot number. + if (isa(TopTy)) + IdxTy = Type::UIntTyID; + else { + switch (ValIdx & 3) { + default: + case 0: IdxTy = Type::UIntTyID; break; + case 1: IdxTy = Type::IntTyID; break; + case 2: IdxTy = Type::ULongTyID; break; + case 3: IdxTy = Type::LongTyID; break; + } + ValIdx >>= 2; + } + } else { + IdxTy = isa(TopTy) ? Type::UByteTyID : Type::LongTyID; + } + + Idx.push_back(getValue(IdxTy, ValIdx)); + + // Convert ubyte struct indices into uint struct indices. + if (isa(TopTy) && hasRestrictedGEPTypes) + if (ConstantUInt *C = dyn_cast(Idx.back())) + Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy); + + NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true); + } + + Result = new GetElementPtrInst(getValue(iType, Oprnds[0]), Idx); + break; + } + + case 62: // volatile load + case Instruction::Load: + if (Oprnds.size() != 1 || !isa(InstTy)) + PARSE_ERROR("Invalid load instruction!"); + Result = new LoadInst(getValue(iType, Oprnds[0]), "", Opcode == 62); + break; + + case 63: // volatile store + case Instruction::Store: { + if (!isa(InstTy) || Oprnds.size() != 2) + PARSE_ERROR("Invalid store instruction!"); + + Value *Ptr = getValue(iType, Oprnds[1]); + const Type *ValTy = cast(Ptr->getType())->getElementType(); + Result = new StoreInst(getValue(getTypeSlot(ValTy), Oprnds[0]), Ptr, + Opcode == 63); + break; + } + case Instruction::Unwind: + if (Oprnds.size() != 0) PARSE_ERROR("Invalid unwind instruction!"); + Result = new UnwindInst(); + break; + } // end switch(Opcode) + + unsigned TypeSlot; + if (Result->getType() == InstTy) + TypeSlot = iType; + else + TypeSlot = getTypeSlot(Result->getType()); + + insertValue(Result, TypeSlot, FunctionValues); + BB->getInstList().push_back(Result); +} + +/// getBasicBlock - Get a particular numbered basic block, which might be a +/// forward reference. This works together with ParseBasicBlock to handle these +/// forward references in a clean manner. +BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) { + // Make sure there is room in the table... + if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1); + + // First check to see if this is a backwards reference, i.e., ParseBasicBlock + // has already created this block, or if the forward reference has already + // been created. + if (ParsedBasicBlocks[ID]) + return ParsedBasicBlocks[ID]; + + // Otherwise, the basic block has not yet been created. Do so and add it to + // the ParsedBasicBlocks list. + return ParsedBasicBlocks[ID] = new BasicBlock(); +} + /// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one /// basicblock at a time. This method reads in one of the basicblock packets. -BasicBlock *BytecodeParser::ParseBasicBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned BlockNo) { - BasicBlock *BB; +BasicBlock *BytecodeReader::ParseBasicBlock( unsigned BlockNo) { + HANDLE(BasicBlockBegin( BlockNo )); + + BasicBlock *BB = 0; + if (ParsedBasicBlocks.size() == BlockNo) ParsedBasicBlocks.push_back(BB = new BasicBlock()); else if (ParsedBasicBlocks[BlockNo] == 0) @@ -230,24 +773,23 @@ BasicBlock *BytecodeParser::ParseBasicBlock(const unsigned char *&Buf, else BB = ParsedBasicBlocks[BlockNo]; - std::vector Args; - while (Buf < EndBuf) - ParseInstruction(Buf, EndBuf, Args, BB); + std::vector Operands; + while ( moreInBlock() ) + ParseInstruction(Operands, BB); + HANDLE(BasicBlockEnd( BlockNo )); return BB; } - /// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the /// body of a function. In post 1.0 bytecode files, we no longer emit basic /// block individually, in order to avoid per-basic-block overhead. -unsigned BytecodeParser::ParseInstructionList(Function *F, - const unsigned char *&Buf, - const unsigned char *EndBuf) { +unsigned BytecodeReader::ParseInstructionList(Function* F) { unsigned BlockNo = 0; std::vector Args; - while (Buf < EndBuf) { + while ( moreInBlock() ) { + HANDLE(BasicBlockBegin( BlockNo )); BasicBlock *BB; if (ParsedBasicBlocks.size() == BlockNo) ParsedBasicBlocks.push_back(BB = new BasicBlock()); @@ -255,24 +797,25 @@ unsigned BytecodeParser::ParseInstructionList(Function *F, BB = ParsedBasicBlocks[BlockNo] = new BasicBlock(); else BB = ParsedBasicBlocks[BlockNo]; + HANDLE(BasicBlockEnd( BlockNo )); ++BlockNo; F->getBasicBlockList().push_back(BB); // Read instructions into this basic block until we get to a terminator - while (Buf < EndBuf && !BB->getTerminator()) - ParseInstruction(Buf, EndBuf, Args, BB); + while ( moreInBlock() && !BB->getTerminator()) + ParseInstruction(Args, BB); if (!BB->getTerminator()) - throw std::string("Non-terminated basic block found!"); + PARSE_ERROR("Non-terminated basic block found!"); } return BlockNo; } -void BytecodeParser::ParseSymbolTable(const unsigned char *&Buf, - const unsigned char *EndBuf, - SymbolTable *ST, - Function *CurrentFunction) { +void BytecodeReader::ParseSymbolTable(Function *CurrentFunction, + SymbolTable *ST) { + HANDLE(SymbolTableBegin(CurrentFunction,ST)); + // Allow efficient basic block lookup by number. std::vector BBMap; if (CurrentFunction) @@ -280,18 +823,16 @@ void BytecodeParser::ParseSymbolTable(const unsigned char *&Buf, E = CurrentFunction->end(); I != E; ++I) BBMap.push_back(I); - while (Buf < EndBuf) { + while ( moreInBlock() ) { // Symtab block header: [num entries][type id number] - unsigned NumEntries = read_vbr_uint(Buf, EndBuf); - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); const Type *Ty = getType(Typ); - BCR_TRACE(3, "Plane Type: '" << *Ty << "' with " << NumEntries << - " entries\n"); for (unsigned i = 0; i != NumEntries; ++i) { // Symtab entry: [def slot #][name] - unsigned slot = read_vbr_uint(Buf, EndBuf); - std::string Name = read_str(Buf, EndBuf); + unsigned slot = read_vbr_uint(); + std::string Name = read_str(); Value *V = 0; if (Typ == Type::TypeTyID) @@ -303,76 +844,444 @@ void BytecodeParser::ParseSymbolTable(const unsigned char *&Buf, V = getValue(Typ, slot, false); // Find mapping... } if (V == 0) - throw "Failed value look-up for name '" + Name + "'"; - BCR_TRACE(4, "Map: '" << Name << "' to #" << slot << ":" << *V; - if (!isa(V)) std::cerr << "\n"); + PARSE_ERROR("Failed value look-up for name '" << Name << "'"); V->setName(Name, ST); } } + checkPastBlockEnd("Symbol Table"); + HANDLE(SymbolTableEnd()); +} + +void BytecodeReader::ParseCompactionTable() { + + HANDLE(CompactionTableBegin()); + + while ( moreInBlock() ) { + unsigned NumEntries = read_vbr_uint(); + unsigned Ty; + + if ((NumEntries & 3) == 3) { + NumEntries >>= 2; + Ty = read_vbr_uint(); + } else { + Ty = NumEntries >> 2; + NumEntries &= 3; + } - if (Buf > EndBuf) throw std::string("Tried to read past end of buffer."); + if (Ty >= CompactionValues.size()) + CompactionValues.resize(Ty+1); + + if (!CompactionValues[Ty].empty()) + PARSE_ERROR("Compaction table plane contains multiple entries!"); + + HANDLE(CompactionTablePlane( Ty, NumEntries )); + + if (Ty == Type::TypeTyID) { + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned TypeSlot = read_vbr_uint(); + const Type *Typ = getGlobalTableType(TypeSlot); + CompactionTypes.push_back(Typ); + HANDLE(CompactionTableType( i, TypeSlot, Typ )); + } + CompactionTypes.resize(NumEntries+Type::FirstDerivedTyID); + } else { + const Type *Typ = getType(Ty); + // Push the implicit zero + CompactionValues[Ty].push_back(Constant::getNullValue(Typ)); + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned ValSlot = read_vbr_uint(); + Value *V = getGlobalTableValue(Typ, ValSlot); + CompactionValues[Ty].push_back(V); + HANDLE(CompactionTableValue( i, Ty, ValSlot, Typ )); + } + } + } + HANDLE(CompactionTableEnd()); +} + +// Parse a single type constant. +const Type *BytecodeReader::ParseTypeConstant() { + unsigned PrimType = read_vbr_uint(); + + const Type *Result = 0; + if ((Result = Type::getPrimitiveType((Type::TypeID)PrimType))) + return Result; + + switch (PrimType) { + case Type::FunctionTyID: { + const Type *RetType = getType(read_vbr_uint()); + + unsigned NumParams = read_vbr_uint(); + + std::vector Params; + while (NumParams--) + Params.push_back(getType(read_vbr_uint())); + + bool isVarArg = Params.size() && Params.back() == Type::VoidTy; + if (isVarArg) Params.pop_back(); + + Result = FunctionType::get(RetType, Params, isVarArg); + break; + } + case Type::ArrayTyID: { + unsigned ElTyp = read_vbr_uint(); + const Type *ElementType = getType(ElTyp); + + unsigned NumElements = read_vbr_uint(); + + Result = ArrayType::get(ElementType, NumElements); + break; + } + case Type::StructTyID: { + std::vector Elements; + unsigned Typ = read_vbr_uint(); + while (Typ) { // List is terminated by void/0 typeid + Elements.push_back(getType(Typ)); + Typ = read_vbr_uint(); + } + + Result = StructType::get(Elements); + break; + } + case Type::PointerTyID: { + unsigned ElTyp = read_vbr_uint(); + Result = PointerType::get(getType(ElTyp)); + break; + } + + case Type::OpaqueTyID: { + Result = OpaqueType::get(); + break; + } + + default: + PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n"); + break; + } + HANDLE(Type( Result )); + return Result; +} + +// ParseTypeConstants - We have to use this weird code to handle recursive +// types. We know that recursive types will only reference the current slab of +// values in the type plane, but they can forward reference types before they +// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might +// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix +// this ugly problem, we pessimistically insert an opaque type for each type we +// are about to read. This means that forward references will resolve to +// something and when we reread the type later, we can replace the opaque type +// with a new resolved concrete type. +// +void BytecodeReader::ParseTypeConstants(TypeListTy &Tab, unsigned NumEntries){ + assert(Tab.size() == 0 && "should not have read type constants in before!"); + + // Insert a bunch of opaque types to be resolved later... + Tab.reserve(NumEntries); + for (unsigned i = 0; i != NumEntries; ++i) + Tab.push_back(OpaqueType::get()); + + // Loop through reading all of the types. Forward types will make use of the + // opaque types just inserted. + // + for (unsigned i = 0; i != NumEntries; ++i) { + const Type *NewTy = ParseTypeConstant(), *OldTy = Tab[i].get(); + if (NewTy == 0) PARSE_ERROR("Couldn't parse type!"); + + // Don't directly push the new type on the Tab. Instead we want to replace + // the opaque type we previously inserted with the new concrete value. This + // approach helps with forward references to types. The refinement from the + // abstract (opaque) type to the new type causes all uses of the abstract + // type to use the concrete type (NewTy). This will also cause the opaque + // type to be deleted. + cast(const_cast(OldTy))->refineAbstractTypeTo(NewTy); + + // This should have replaced the old opaque type with the new type in the + // value table... or with a preexisting type that was already in the system. + // Let's just make sure it did. + assert(Tab[i] != OldTy && "refineAbstractType didn't work!"); + } +} + +Constant *BytecodeReader::ParseConstantValue( unsigned TypeID) { + // We must check for a ConstantExpr before switching by type because + // a ConstantExpr can be of any type, and has no explicit value. + // + // 0 if not expr; numArgs if is expr + unsigned isExprNumArgs = read_vbr_uint(); + + if (isExprNumArgs) { + // FIXME: Encoding of constant exprs could be much more compact! + std::vector ArgVec; + ArgVec.reserve(isExprNumArgs); + unsigned Opcode = read_vbr_uint(); + + // Read the slot number and types of each of the arguments + for (unsigned i = 0; i != isExprNumArgs; ++i) { + unsigned ArgValSlot = read_vbr_uint(); + unsigned ArgTypeSlot = read_vbr_uint(); + + // Get the arg value from its slot if it exists, otherwise a placeholder + ArgVec.push_back(getConstantValue(ArgTypeSlot, ArgValSlot)); + } + + // Construct a ConstantExpr of the appropriate kind + if (isExprNumArgs == 1) { // All one-operand expressions + assert(Opcode == Instruction::Cast); + Constant* Result = ConstantExpr::getCast(ArgVec[0], getType(TypeID)); + HANDLE(ConstantExpression(Opcode, ArgVec, Result)); + return Result; + } else if (Opcode == Instruction::GetElementPtr) { // GetElementPtr + std::vector IdxList(ArgVec.begin()+1, ArgVec.end()); + + if (hasRestrictedGEPTypes) { + const Type *BaseTy = ArgVec[0]->getType(); + generic_gep_type_iterator::iterator> + GTI = gep_type_begin(BaseTy, IdxList.begin(), IdxList.end()), + E = gep_type_end(BaseTy, IdxList.begin(), IdxList.end()); + for (unsigned i = 0; GTI != E; ++GTI, ++i) + if (isa(*GTI)) { + if (IdxList[i]->getType() != Type::UByteTy) + PARSE_ERROR("Invalid index for getelementptr!"); + IdxList[i] = ConstantExpr::getCast(IdxList[i], Type::UIntTy); + } + } + + Constant* Result = ConstantExpr::getGetElementPtr(ArgVec[0], IdxList); + HANDLE(ConstantExpression(Opcode, ArgVec, Result)); + return Result; + } else if (Opcode == Instruction::Select) { + assert(ArgVec.size() == 3); + Constant* Result = ConstantExpr::getSelect(ArgVec[0], ArgVec[1], + ArgVec[2]); + HANDLE(ConstantExpression(Opcode, ArgVec, Result)); + return Result; + } else { // All other 2-operand expressions + Constant* Result = ConstantExpr::get(Opcode, ArgVec[0], ArgVec[1]); + HANDLE(ConstantExpression(Opcode, ArgVec, Result)); + return Result; + } + } + + // Ok, not an ConstantExpr. We now know how to read the given type... + const Type *Ty = getType(TypeID); + switch (Ty->getTypeID()) { + case Type::BoolTyID: { + unsigned Val = read_vbr_uint(); + if (Val != 0 && Val != 1) + PARSE_ERROR("Invalid boolean value read."); + Constant* Result = ConstantBool::get(Val == 1); + HANDLE(ConstantValue(Result)); + return Result; + } + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: { + unsigned Val = read_vbr_uint(); + if (!ConstantUInt::isValueValidForType(Ty, Val)) + PARSE_ERROR("Invalid unsigned byte/short/int read."); + Constant* Result = ConstantUInt::get(Ty, Val); + HANDLE(ConstantValue(Result)); + return Result; + } + + case Type::ULongTyID: { + Constant* Result = ConstantUInt::get(Ty, read_vbr_uint64()); + HANDLE(ConstantValue(Result)); + return Result; + } + + case Type::SByteTyID: // Signed integer types... + case Type::ShortTyID: + case Type::IntTyID: { + case Type::LongTyID: + int64_t Val = read_vbr_int64(); + if (!ConstantSInt::isValueValidForType(Ty, Val)) + PARSE_ERROR("Invalid signed byte/short/int/long read."); + Constant* Result = ConstantSInt::get(Ty, Val); + HANDLE(ConstantValue(Result)); + return Result; + } + + case Type::FloatTyID: { + float F; + read_data(&F, &F+1); + Constant* Result = ConstantFP::get(Ty, F); + HANDLE(ConstantValue(Result)); + return Result; + } + + case Type::DoubleTyID: { + double Val; + read_data(&Val, &Val+1); + Constant* Result = ConstantFP::get(Ty, Val); + HANDLE(ConstantValue(Result)); + return Result; + } + + case Type::TypeTyID: + PARSE_ERROR("Type constants shouldn't live in constant table!"); + break; + + case Type::ArrayTyID: { + const ArrayType *AT = cast(Ty); + unsigned NumElements = AT->getNumElements(); + unsigned TypeSlot = getTypeSlot(AT->getElementType()); + std::vector Elements; + Elements.reserve(NumElements); + while (NumElements--) // Read all of the elements of the constant. + Elements.push_back(getConstantValue(TypeSlot, + read_vbr_uint())); + Constant* Result = ConstantArray::get(AT, Elements); + HANDLE(ConstantArray(AT, Elements, TypeSlot, Result)); + return Result; + } + + case Type::StructTyID: { + const StructType *ST = cast(Ty); + + std::vector Elements; + Elements.reserve(ST->getNumElements()); + for (unsigned i = 0; i != ST->getNumElements(); ++i) + Elements.push_back(getConstantValue(ST->getElementType(i), + read_vbr_uint())); + + Constant* Result = ConstantStruct::get(ST, Elements); + HANDLE(ConstantStruct(ST, Elements, Result)); + return Result; + } + + case Type::PointerTyID: { // ConstantPointerRef value... + const PointerType *PT = cast(Ty); + unsigned Slot = read_vbr_uint(); + + // Check to see if we have already read this global variable... + Value *Val = getValue(TypeID, Slot, false); + GlobalValue *GV; + if (Val) { + if (!(GV = dyn_cast(Val))) + PARSE_ERROR("Value of ConstantPointerRef not in ValueTable!"); + } else { + PARSE_ERROR("Forward references are not allowed here."); + } + + Constant* Result = ConstantPointerRef::get(GV); + HANDLE(ConstantPointer(PT, Slot, GV, Result)); + return Result; + } + + default: + PARSE_ERROR("Don't know how to deserialize constant value of type '"+ + Ty->getDescription()); + break; + } } -void BytecodeParser::ResolveReferencesToConstant(Constant *NewV, unsigned Slot){ +void BytecodeReader::ResolveReferencesToConstant(Constant *NewV, unsigned Slot){ ConstantRefsType::iterator I = ConstantFwdRefs.find(std::make_pair(NewV->getType(), Slot)); if (I == ConstantFwdRefs.end()) return; // Never forward referenced? - BCR_TRACE(3, "Mutating forward refs!\n"); Value *PH = I->second; // Get the placeholder... PH->replaceAllUsesWith(NewV); delete PH; // Delete the old placeholder ConstantFwdRefs.erase(I); // Remove the map entry for it } -void BytecodeParser::ParseFunction(const unsigned char *&Buf, - const unsigned char *EndBuf) { - if (FunctionSignatureList.empty()) - throw std::string("FunctionSignatureList empty!"); - - Function *F = FunctionSignatureList.back(); - FunctionSignatureList.pop_back(); - - // Save the information for future reading of the function - LazyFunctionLoadMap[F] = LazyFunctionInfo(Buf, EndBuf); - // Pretend we've `parsed' this function - Buf = EndBuf; +void BytecodeReader::ParseStringConstants(unsigned NumEntries, ValueTable &Tab){ + for (; NumEntries; --NumEntries) { + unsigned Typ = read_vbr_uint(); + const Type *Ty = getType(Typ); + if (!isa(Ty)) + PARSE_ERROR("String constant data invalid!"); + + const ArrayType *ATy = cast(Ty); + if (ATy->getElementType() != Type::SByteTy && + ATy->getElementType() != Type::UByteTy) + PARSE_ERROR("String constant data invalid!"); + + // Read character data. The type tells us how long the string is. + char Data[ATy->getNumElements()]; + read_data(Data, Data+ATy->getNumElements()); + + std::vector Elements(ATy->getNumElements()); + if (ATy->getElementType() == Type::SByteTy) + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]); + else + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]); + + // Create the constant, inserting it as needed. + Constant *C = ConstantArray::get(ATy, Elements); + unsigned Slot = insertValue(C, Typ, Tab); + ResolveReferencesToConstant(C, Slot); + HANDLE(ConstantString(cast(C))); + } } -void BytecodeParser::materializeFunction(Function* F) { - // Find {start, end} pointers and slot in the map. If not there, we're done. - std::map::iterator Fi = - LazyFunctionLoadMap.find(F); - if (Fi == LazyFunctionLoadMap.end()) return; +void BytecodeReader::ParseConstantPool(ValueTable &Tab, + TypeListTy &TypeTab) { + HANDLE(GlobalConstantsBegin()); + while ( moreInBlock() ) { + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); + if (Typ == Type::TypeTyID) { + ParseTypeConstants(TypeTab, NumEntries); + } else if (Typ == Type::VoidTyID) { + assert(&Tab == &ModuleValues && "Cannot read strings in functions!"); + ParseStringConstants(NumEntries, Tab); + } else { + for (unsigned i = 0; i < NumEntries; ++i) { + Constant *C = ParseConstantValue(Typ); + assert(C && "ParseConstantValue returned NULL!"); + unsigned Slot = insertValue(C, Typ, Tab); + + // If we are reading a function constant table, make sure that we adjust + // the slot number to be the real global constant number. + // + if (&Tab != &ModuleValues && Typ < ModuleValues.size() && + ModuleValues[Typ]) + Slot += ModuleValues[Typ]->size(); + ResolveReferencesToConstant(C, Slot); + } + } + } + checkPastBlockEnd("Constant Pool"); + HANDLE(GlobalConstantsEnd()); +} - const unsigned char *Buf = Fi->second.Buf; - const unsigned char *EndBuf = Fi->second.EndBuf; - LazyFunctionLoadMap.erase(Fi); +void BytecodeReader::ParseFunctionBody(Function* F ) { + unsigned FuncSize = BlockEnd - At; GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; - unsigned LinkageType = read_vbr_uint(Buf, EndBuf); - if (LinkageType > 4) - throw std::string("Invalid linkage type for Function."); + unsigned LinkageType = read_vbr_uint(); switch (LinkageType) { case 0: Linkage = GlobalValue::ExternalLinkage; break; case 1: Linkage = GlobalValue::WeakLinkage; break; case 2: Linkage = GlobalValue::AppendingLinkage; break; case 3: Linkage = GlobalValue::InternalLinkage; break; case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + PARSE_ERROR("Invalid linkage type for Function."); + Linkage = GlobalValue::InternalLinkage; + break; } - F->setLinkage(Linkage); + F->setLinkage( Linkage ); + HANDLE(FunctionBegin(F,FuncSize)); // Keep track of how many basic blocks we have read in... unsigned BlockNum = 0; bool InsertedArguments = false; - while (Buf < EndBuf) { + BufPtr MyEnd = BlockEnd; + while ( At < MyEnd ) { unsigned Type, Size; - const unsigned char *OldBuf = Buf; - readBlock(Buf, EndBuf, Type, Size); + BufPtr OldAt = At; + read_block(Type, Size); switch (Type) { case BytecodeFormat::ConstantPool: @@ -380,21 +1289,15 @@ void BytecodeParser::materializeFunction(Function* F) { // Insert arguments into the value table before we parse the first basic // block in the function, but after we potentially read in the // compaction table. - const FunctionType *FT = F->getFunctionType(); - Function::aiterator AI = F->abegin(); - for (FunctionType::param_iterator It = FT->param_begin(); - It != FT->param_end(); ++It, ++AI) - insertValue(AI, getTypeSlot(AI->getType()), Values); + insertArguments(F); InsertedArguments = true; } - BCR_TRACE(2, "BLOCK BytecodeFormat::ConstantPool: {\n"); - ParseConstantPool(Buf, Buf+Size, Values, FunctionTypeValues); + ParseConstantPool(FunctionValues, FunctionTypes); break; case BytecodeFormat::CompactionTable: - BCR_TRACE(2, "BLOCK BytecodeFormat::CompactionTable: {\n"); - ParseCompactionTable(Buf, Buf+Size); + ParseCompactionTable(); break; case BytecodeFormat::BasicBlock: { @@ -402,16 +1305,11 @@ void BytecodeParser::materializeFunction(Function* F) { // Insert arguments into the value table before we parse the first basic // block in the function, but after we potentially read in the // compaction table. - const FunctionType *FT = F->getFunctionType(); - Function::aiterator AI = F->abegin(); - for (FunctionType::param_iterator It = FT->param_begin(); - It != FT->param_end(); ++It, ++AI) - insertValue(AI, getTypeSlot(AI->getType()), Values); + insertArguments(F); InsertedArguments = true; } - BCR_TRACE(2, "BLOCK BytecodeFormat::BasicBlock: {\n"); - BasicBlock *BB = ParseBasicBlock(Buf, Buf+Size, BlockNum++); + BasicBlock *BB = ParseBasicBlock(BlockNum++); F->getBasicBlockList().push_back(BB); break; } @@ -421,41 +1319,36 @@ void BytecodeParser::materializeFunction(Function* F) { // list for the function, but after we potentially read in the compaction // table. if (!InsertedArguments) { - const FunctionType *FT = F->getFunctionType(); - Function::aiterator AI = F->abegin(); - for (FunctionType::param_iterator It = FT->param_begin(); - It != FT->param_end(); ++It, ++AI) - insertValue(AI, getTypeSlot(AI->getType()), Values); + insertArguments(F); InsertedArguments = true; } - BCR_TRACE(2, "BLOCK BytecodeFormat::InstructionList: {\n"); - if (BlockNum) throw std::string("Already parsed basic blocks!"); - BlockNum = ParseInstructionList(F, Buf, Buf+Size); + if (BlockNum) + PARSE_ERROR("Already parsed basic blocks!"); + BlockNum = ParseInstructionList(F); break; } case BytecodeFormat::SymbolTable: - BCR_TRACE(2, "BLOCK BytecodeFormat::SymbolTable: {\n"); - ParseSymbolTable(Buf, Buf+Size, &F->getSymbolTable(), F); + ParseSymbolTable(F, &F->getSymbolTable()); break; default: - BCR_TRACE(2, "BLOCK :ignored! {\n"); - Buf += Size; - if (OldBuf > Buf) - throw std::string("Wrapped around reading bytecode."); + At += Size; + if (OldAt > At) + PARSE_ERROR("Wrapped around reading bytecode."); break; } - BCR_TRACE(2, "} end block\n"); + BlockEnd = MyEnd; // Malformed bc file if read past end of block. - align32(Buf, EndBuf); + align32(); } // Make sure there were no references to non-existant basic blocks. if (BlockNum != ParsedBasicBlocks.size()) - throw std::string("Illegal basic block operand reference"); + PARSE_ERROR("Illegal basic block operand reference"); + ParsedBasicBlocks.clear(); // Resolve forward references. Replace any uses of a forward reference value @@ -496,128 +1389,152 @@ void BytecodeParser::materializeFunction(Function* F) { } // Clear out function-level types... - FunctionTypeValues.clear(); - CompactionTable.clear(); - freeTable(Values); + FunctionTypes.clear(); + CompactionTypes.clear(); + CompactionValues.clear(); + freeTable(FunctionValues); + + HANDLE(FunctionEnd(F)); } -void BytecodeParser::ParseCompactionTable(const unsigned char *&Buf, - const unsigned char *End) { +void BytecodeReader::ParseFunctionLazily() { + if (FunctionSignatureList.empty()) + PARSE_ERROR("FunctionSignatureList empty!"); - while (Buf != End) { - unsigned NumEntries = read_vbr_uint(Buf, End); - unsigned Ty; + Function *Func = FunctionSignatureList.back(); + FunctionSignatureList.pop_back(); - if ((NumEntries & 3) == 3) { - NumEntries >>= 2; - Ty = read_vbr_uint(Buf, End); - } else { - Ty = NumEntries >> 2; - NumEntries &= 3; - } + // Save the information for future reading of the function + LazyFunctionLoadMap[Func] = LazyFunctionInfo(BlockStart, BlockEnd); - if (Ty >= CompactionTable.size()) - CompactionTable.resize(Ty+1); + // Pretend we've `parsed' this function + At = BlockEnd; +} - if (!CompactionTable[Ty].empty()) - throw std::string("Compaction table plane contains multiple entries!"); - - if (Ty == Type::TypeTyID) { - for (unsigned i = 0; i != NumEntries; ++i) { - const Type *Typ = getGlobalTableType(read_vbr_uint(Buf, End)); - CompactionTable[Type::TypeTyID].push_back(const_cast(Typ)); - } +void BytecodeReader::ParseFunction(Function* Func) { + // Find {start, end} pointers and slot in the map. If not there, we're done. + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(Func); - CompactionTable.resize(NumEntries+Type::FirstDerivedTyID); - } else { - const Type *Typ = getType(Ty); - // Push the implicit zero - CompactionTable[Ty].push_back(Constant::getNullValue(Typ)); - for (unsigned i = 0; i != NumEntries; ++i) { - Value *V = getGlobalTableValue(Typ, read_vbr_uint(Buf, End)); - CompactionTable[Ty].push_back(V); - } - } + // Make sure we found it + if ( Fi == LazyFunctionLoadMap.end() ) { + PARSE_ERROR("Unrecognized function of type " << Func->getType()->getDescription()); + return; } + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.EndBuf; + assert(Fi->first == Func); + + LazyFunctionLoadMap.erase(Fi); + + this->ParseFunctionBody( Func ); } +void BytecodeReader::ParseAllFunctionBodies() { + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin(); + LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end(); + + while ( Fi != Fe ) { + Function* Func = Fi->first; + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.EndBuf; + this->ParseFunctionBody(Func); + ++Fi; + } +} +void BytecodeReader::ParseGlobalTypes() { + ValueTable T; + ParseConstantPool(T, ModuleTypes); +} -void BytecodeParser::ParseModuleGlobalInfo(const unsigned char *&Buf, - const unsigned char *End) { - if (!FunctionSignatureList.empty()) - throw std::string("Two ModuleGlobalInfo packets found!"); +void BytecodeReader::ParseModuleGlobalInfo() { + + HANDLE(ModuleGlobalsBegin()); // Read global variables... - unsigned VarType = read_vbr_uint(Buf, End); + unsigned VarType = read_vbr_uint(); while (VarType != Type::VoidTyID) { // List is terminated by Void // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 = // Linkage, bit4+ = slot# unsigned SlotNo = VarType >> 5; unsigned LinkageID = (VarType >> 2) & 7; + bool isConstant = VarType & 1; + bool hasInitializer = VarType & 2; GlobalValue::LinkageTypes Linkage; switch (LinkageID) { - default: assert(0 && "Unknown linkage type!"); case 0: Linkage = GlobalValue::ExternalLinkage; break; case 1: Linkage = GlobalValue::WeakLinkage; break; case 2: Linkage = GlobalValue::AppendingLinkage; break; case 3: Linkage = GlobalValue::InternalLinkage; break; case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + PARSE_ERROR("Unknown linkage type: " << LinkageID); + Linkage = GlobalValue::InternalLinkage; + break; } const Type *Ty = getType(SlotNo); - if (!isa(Ty)) - throw std::string("Global not pointer type! Ty = " + - Ty->getDescription()); + if ( !Ty ) { + PARSE_ERROR("Global has no type! SlotNo=" << SlotNo); + } + + if ( !isa(Ty)) { + PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription()); + } const Type *ElTy = cast(Ty)->getElementType(); // Create the global variable... - GlobalVariable *GV = new GlobalVariable(ElTy, VarType & 1, Linkage, + GlobalVariable *GV = new GlobalVariable(ElTy, isConstant, Linkage, 0, "", TheModule); - BCR_TRACE(2, "Global Variable of type: " << *Ty << "\n"); insertValue(GV, SlotNo, ModuleValues); - if (VarType & 2) // Does it have an initializer? - GlobalInits.push_back(std::make_pair(GV, read_vbr_uint(Buf, End))); - VarType = read_vbr_uint(Buf, End); + unsigned initSlot = 0; + if (hasInitializer) { + initSlot = read_vbr_uint(); + GlobalInits.push_back(std::make_pair(GV, initSlot)); + } + + // Notify handler about the global value. + HANDLE(GlobalVariable( ElTy, isConstant, Linkage, SlotNo, initSlot )); + + // Get next item + VarType = read_vbr_uint(); } // Read the function objects for all of the functions that are coming - unsigned FnSignature = read_vbr_uint(Buf, End); + unsigned FnSignature = read_vbr_uint(); while (FnSignature != Type::VoidTyID) { // List is terminated by Void const Type *Ty = getType(FnSignature); if (!isa(Ty) || - !isa(cast(Ty)->getElementType())) - throw std::string("Function not ptr to func type! Ty = " + + !isa(cast(Ty)->getElementType())) { + PARSE_ERROR( "Function not a pointer to function type! Ty = " + Ty->getDescription()); + // FIXME: what should Ty be if handler continues? + } // We create functions by passing the underlying FunctionType to create... - Ty = cast(Ty)->getElementType(); - - // When the ModuleGlobalInfo section is read, we load the type of each - // function and the 'ModuleValues' slot that it lands in. We then load a - // placeholder into its slot to reserve it. When the function is loaded, - // this placeholder is replaced. + const FunctionType* FTy = + cast(cast(Ty)->getElementType()); - // Insert the placeholder... - Function *Func = new Function(cast(Ty), - GlobalValue::InternalLinkage, "", TheModule); + // Insert the place hodler + Function* Func = new Function(FTy, GlobalValue::InternalLinkage, + "", TheModule); insertValue(Func, FnSignature, ModuleValues); - // Keep track of this information in a list that is emptied as functions are - // loaded... - // + // Save this for later so we know type of lazily instantiated functions FunctionSignatureList.push_back(Func); - FnSignature = read_vbr_uint(Buf, End); - BCR_TRACE(2, "Function of type: " << Ty << "\n"); + HANDLE(FunctionDeclaration(Func)); + + // Get Next function signature + FnSignature = read_vbr_uint(); } if (hasInconsistentModuleGlobalInfo) - align32(Buf, End); + align32(); // Now that the function signature list is set up, reverse it so that we can // remove elements efficiently from the back of the vector. @@ -626,12 +1543,13 @@ void BytecodeParser::ParseModuleGlobalInfo(const unsigned char *&Buf, // This is for future proofing... in the future extra fields may be added that // we don't understand, so we transparently ignore them. // - Buf = End; + At = BlockEnd; + + HANDLE(ModuleGlobalsEnd()); } -void BytecodeParser::ParseVersionInfo(const unsigned char *&Buf, - const unsigned char *EndBuf) { - unsigned Version = read_vbr_uint(Buf, EndBuf); +void BytecodeReader::ParseVersionInfo() { + unsigned Version = read_vbr_uint(); // Unpack version number: low four bits are for flags, top bits = version Module::Endianness Endianness; @@ -671,71 +1589,69 @@ void BytecodeParser::ParseVersionInfo(const unsigned char *&Buf, break; default: - throw std::string("Unknown bytecode version number!"); + PARSE_ERROR("Unknown bytecode version number: " << RevisionNum); } if (hasNoEndianness) Endianness = Module::AnyEndianness; if (hasNoPointerSize) PointerSize = Module::AnyPointerSize; - TheModule->setEndianness(Endianness); - TheModule->setPointerSize(PointerSize); - BCR_TRACE(1, "Bytecode Rev = " << (unsigned)RevisionNum << "\n"); - BCR_TRACE(1, "Endianness/PointerSize = " << Endianness << "," - << PointerSize << "\n"); + HANDLE(VersionInfo(RevisionNum, Endianness, PointerSize )); } -void BytecodeParser::ParseModule(const unsigned char *Buf, - const unsigned char *EndBuf) { +void BytecodeReader::ParseModule() { unsigned Type, Size; - readBlock(Buf, EndBuf, Type, Size); - if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) - throw std::string("Expected Module packet! B: "+ - utostr((unsigned)(intptr_t)Buf) + ", S: "+utostr(Size)+ - " E: "+utostr((unsigned)(intptr_t)EndBuf)); // Hrm, not a class? - BCR_TRACE(0, "BLOCK BytecodeFormat::Module: {\n"); - FunctionSignatureList.clear(); // Just in case... + FunctionSignatureList.clear(); // Just in case... // Read into instance variables... - ParseVersionInfo(Buf, EndBuf); - align32(Buf, EndBuf); + ParseVersionInfo(); + align32(); /// FIXME: Is this redundant? VI is first and 4 bytes! + + bool SeenModuleGlobalInfo = false; + bool SeenGlobalTypePlane = false; + BufPtr MyEnd = BlockEnd; + while (At < MyEnd) { + BufPtr OldAt = At; + read_block(Type, Size); - while (Buf < EndBuf) { - const unsigned char *OldBuf = Buf; - readBlock(Buf, EndBuf, Type, Size); switch (Type) { + case BytecodeFormat::GlobalTypePlane: - BCR_TRACE(1, "BLOCK BytecodeFormat::GlobalTypePlane: {\n"); - ParseGlobalTypes(Buf, Buf+Size); + if ( SeenGlobalTypePlane ) + PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); + + ParseGlobalTypes(); + SeenGlobalTypePlane = true; break; - case BytecodeFormat::ModuleGlobalInfo: - BCR_TRACE(1, "BLOCK BytecodeFormat::ModuleGlobalInfo: {\n"); - ParseModuleGlobalInfo(Buf, Buf+Size); + case BytecodeFormat::ModuleGlobalInfo: + if ( SeenModuleGlobalInfo ) + PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); + ParseModuleGlobalInfo(); + SeenModuleGlobalInfo = true; break; case BytecodeFormat::ConstantPool: - BCR_TRACE(1, "BLOCK BytecodeFormat::ConstantPool: {\n"); - ParseConstantPool(Buf, Buf+Size, ModuleValues, ModuleTypeValues); + ParseConstantPool(ModuleValues, ModuleTypes); break; - case BytecodeFormat::Function: { - BCR_TRACE(1, "BLOCK BytecodeFormat::Function: {\n"); - ParseFunction(Buf, Buf+Size); + case BytecodeFormat::Function: + ParseFunctionLazily(); break; - } case BytecodeFormat::SymbolTable: - BCR_TRACE(1, "BLOCK BytecodeFormat::SymbolTable: {\n"); - ParseSymbolTable(Buf, Buf+Size, &TheModule->getSymbolTable(), 0); + ParseSymbolTable(0, &TheModule->getSymbolTable()); break; + default: - Buf += Size; - if (OldBuf > Buf) throw std::string("Expected Module Block!"); + At += Size; + if (OldAt > At) { + PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); + } break; } - BCR_TRACE(1, "} end block\n"); - align32(Buf, EndBuf); + BlockEnd = MyEnd; + align32(); } // After the module constant pool has been read, we can safely initialize @@ -747,38 +1663,151 @@ void BytecodeParser::ParseModule(const unsigned char *Buf, // Look up the initializer value... // FIXME: Preserve this type ID! - unsigned TypeSlot = getTypeSlot(GV->getType()->getElementType()); + + const llvm::PointerType* GVType = GV->getType(); + unsigned TypeSlot = getTypeSlot(GVType->getElementType()); if (Constant *CV = getConstantValue(TypeSlot, Slot)) { if (GV->hasInitializer()) - throw std::string("Global *already* has an initializer?!"); + PARSE_ERROR("Global *already* has an initializer?!"); + HANDLE(GlobalInitializer(GV,CV)); GV->setInitializer(CV); } else - throw std::string("Cannot find initializer value."); + PARSE_ERROR("Cannot find initializer value."); } + /// Make sure we pulled them all out. If we didn't then there's a declaration + /// but a missing body. That's not allowed. if (!FunctionSignatureList.empty()) - throw std::string("Function expected, but bytecode stream ended!"); - - BCR_TRACE(0, "} end block\n\n"); + PARSE_ERROR( + "Function declared, but bytecode stream ended before definition"); } -void BytecodeParser::ParseBytecode(const unsigned char *Buf, unsigned Length, - const std::string &ModuleID) { +void BytecodeReader::ParseBytecode( + BufPtr Buf, unsigned Length, + const std::string &ModuleID) { + + try { + At = MemStart = BlockStart = Buf; + MemEnd = BlockEnd = Buf + Length; + + // Create the module + TheModule = new Module(ModuleID); + + HANDLE(Start(TheModule, Length)); + + // Read and check signature... + unsigned Sig = read_uint(); + if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { + PARSE_ERROR("Invalid bytecode signature: " << Sig); + } + + + // Tell the handler we're starting a module + HANDLE(ModuleBegin(ModuleID)); + + // Get the module block and size and verify + unsigned Type, Size; + read_block(Type, Size); + if ( Type != BytecodeFormat::Module ) { + PARSE_ERROR("Expected Module Block! At: " << unsigned(intptr_t(At)) + << ", Type:" << Type << ", Size:" << Size); + } + if ( At + Size != MemEnd ) { + PARSE_ERROR("Invalid Top Level Block Length! At: " + << unsigned(intptr_t(At)) << ", Type:" << Type << ", Size:" << Size); + } - unsigned char *EndBuf = (unsigned char*)(Buf + Length); + // Parse the module contents + this->ParseModule(); - // Read and check signature... - unsigned Sig = read(Buf, EndBuf); - if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) - throw std::string("Invalid bytecode signature!"); + // Tell the handler we're done + HANDLE(ModuleEnd(ModuleID)); - TheModule = new Module(ModuleID); - try { - ParseModule(Buf, EndBuf); - } catch (std::string &Error) { - freeState(); // Must destroy handles before deleting module! + // Check for missing functions + if ( hasFunctions() ) + PARSE_ERROR("Function expected, but bytecode stream ended!"); + + // Tell the handler we're + HANDLE(Finish()); + + } catch (std::string& errstr ) { + HANDLE(Error(errstr)); + freeState(); delete TheModule; TheModule = 0; throw; + } catch (...) { + std::string msg("Unknown Exception Occurred"); + HANDLE(Error(msg)); + freeState(); + delete TheModule; + TheModule = 0; + throw msg; } } + +//===----------------------------------------------------------------------===// +//=== Default Implementations of Handler Methods +//===----------------------------------------------------------------------===// + +BytecodeHandler::~BytecodeHandler() {} +void BytecodeHandler::handleError(const std::string& str ) { } +void BytecodeHandler::handleStart(Module*m, unsigned Length ) { } +void BytecodeHandler::handleFinish() { } +void BytecodeHandler::handleModuleBegin(const std::string& id) { } +void BytecodeHandler::handleModuleEnd(const std::string& id) { } +void BytecodeHandler::handleVersionInfo( unsigned char RevisionNum, + Module::Endianness Endianness, Module::PointerSize PointerSize) { } +void BytecodeHandler::handleModuleGlobalsBegin() { } +void BytecodeHandler::handleGlobalVariable( + const Type* ElemType, bool isConstant, GlobalValue::LinkageTypes, + unsigned SlotNo, unsigned initSlot ) { } +void BytecodeHandler::handleType( const Type* Ty ) {} +void BytecodeHandler::handleFunctionDeclaration( + Function* Func ) {} +void BytecodeHandler::handleGlobalInitializer(GlobalVariable*, Constant* ) {} +void BytecodeHandler::handleModuleGlobalsEnd() { } +void BytecodeHandler::handleCompactionTableBegin() { } +void BytecodeHandler::handleCompactionTablePlane( + unsigned Ty, unsigned NumEntries) {} +void BytecodeHandler::handleCompactionTableType( + unsigned i, unsigned TypSlot, const Type* ) {} +void BytecodeHandler::handleCompactionTableValue( + unsigned i, unsigned TypSlot, unsigned ValSlot, const Type* ) {} +void BytecodeHandler::handleCompactionTableEnd() { } +void BytecodeHandler::handleSymbolTableBegin(Function*, SymbolTable*) { } +void BytecodeHandler::handleSymbolTablePlane( unsigned Ty, unsigned NumEntries, + const Type* Typ) { } +void BytecodeHandler::handleSymbolTableType( unsigned i, unsigned slot, + const std::string& name ) { } +void BytecodeHandler::handleSymbolTableValue( unsigned i, unsigned slot, + const std::string& name ) { } +void BytecodeHandler::handleSymbolTableEnd() { } +void BytecodeHandler::handleFunctionBegin( Function* Func, + unsigned Size ) {} +void BytecodeHandler::handleFunctionEnd( Function* Func) { } +void BytecodeHandler::handleBasicBlockBegin( unsigned blocknum) { } +bool BytecodeHandler::handleInstruction( unsigned Opcode, const Type* iType, + std::vector& Operands, unsigned Size) { + return Instruction::isTerminator(Opcode); + } +void BytecodeHandler::handleBasicBlockEnd(unsigned blocknum) { } +void BytecodeHandler::handleGlobalConstantsBegin() { } +void BytecodeHandler::handleConstantExpression( unsigned Opcode, + std::vector ArgVec, Constant* Val) { } +void BytecodeHandler::handleConstantValue( Constant * c ) { } +void BytecodeHandler::handleConstantArray( const ArrayType* AT, + std::vector& Elements, unsigned TypeSlot, Constant* Val ) { } +void BytecodeHandler::handleConstantStruct( const StructType* ST, + std::vector& ElementSlots, Constant* Val ) { } +void BytecodeHandler::handleConstantPointer( + const PointerType* PT, unsigned TypeSlot, GlobalValue*, Constant* Val) { } +void BytecodeHandler::handleConstantString( const ConstantArray* CA ) {} +void BytecodeHandler::handleGlobalConstantsEnd() {} +void BytecodeHandler::handleAlignment(unsigned numBytes) {} +void BytecodeHandler::handleBlock( + unsigned BType, const unsigned char* StartPtr, unsigned Size) {} +void BytecodeHandler::handleVBR32(unsigned Size ) {} +void BytecodeHandler::handleVBR64(unsigned Size ) {} + +// vim: sw=2