1 //===-- Parser.h - Abstract Interface To Bytecode Parsing -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Reid Spencer and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This header file defines the interface to the Bytecode Parser and the
11 // Bytecode Handler interface that it calls.
13 //===----------------------------------------------------------------------===//
15 #ifndef BYTECODE_PARSER_H
16 #define BYTECODE_PARSER_H
18 #include "llvm/Constants.h"
19 #include "llvm/DerivedTypes.h"
20 #include "llvm/GlobalValue.h"
21 #include "llvm/Module.h"
28 class BytecodeHandler; ///< Forward declare the handler interface
30 /// This class defines the interface for parsing a buffer of bytecode. The
31 /// parser itself takes no action except to call the various functions of
32 /// the handler interface. The parser's sole responsibility is the correct
33 /// interpretation of the bytecode buffer. The handler is responsible for
34 /// instantiating and keeping track of all values. As a convenience, the parser
35 /// is responsible for materializing types and will pass them through the
36 /// handler interface as necessary.
37 /// @see BytecodeHandler
38 /// @brief Abstract Bytecode Parser interface
39 class AbstractBytecodeParser {
41 /// @name Constructors
44 AbstractBytecodeParser(
46 bool repAlignment = false,
47 bool repBlocks = false,
51 reportAlignment = repAlignment;
52 reportBlocks = repBlocks;
56 ~AbstractBytecodeParser() { }
62 /// @brief A convenience type for the buffer pointer
63 typedef const unsigned char* BufPtr;
65 /// @brief The type used for vector of potentially abstract types
66 typedef std::vector<PATypeHolder> TypeListTy;
75 /// @brief Main interface to parsing a bytecode buffer.
76 void ParseBytecode(const unsigned char *Buf, unsigned Length,
77 const std::string &ModuleID);
79 /// The ParseBytecode method lazily parses functions. Use this
80 /// method to cause the parser to actually parse all the function bodies
81 /// in the bytecode buffer.
82 /// @see ParseBytecode
83 /// @brief Parse all function bodies
84 void ParseAllFunctionBodies ();
86 /// The Parsebytecode method lazily parses functions. Use this
87 /// method to casue the parser to parse the next function of a given
88 /// types. Note that this will remove the function from what is to be
89 /// included by ParseAllFunctionBodies.
90 /// @see ParseAllFunctionBodies
91 /// @see ParseBytecode
92 /// @brief Parse the next function of specific type
93 void ParseNextFunction (Function* Func) ;
96 /// @name Parsing Units For Subclasses
99 /// @brief Parse whole module scope
102 /// @brief Parse the version information block
103 void ParseVersionInfo ();
105 /// @brief Parse the ModuleGlobalInfo block
106 void ParseModuleGlobalInfo ();
108 /// @brief Parse a symbol table
109 void ParseSymbolTable ();
111 /// This function parses LLVM functions lazily. It obtains the type of the
112 /// function and records where the body of the function is in the bytecode
113 /// buffer. The caller can then use the ParseNextFunction and
114 /// ParseAllFunctionBodies to get handler events for the functions.
115 /// @brief Parse functions lazily.
116 void ParseFunctionLazily ();
118 /// @brief Parse a function body
119 void ParseFunctionBody (Function* Func);
121 /// @brief Parse a compaction table
122 void ParseCompactionTable ();
124 /// @brief Parse global types
125 void ParseGlobalTypes ();
127 /// @brief Parse a basic block (for LLVM 1.0 basic block blocks)
128 void ParseBasicBlock (unsigned BlockNo);
130 /// @brief parse an instruction list (for post LLVM 1.0 instruction lists
131 /// with blocks differentiated by terminating instructions.
132 unsigned ParseInstructionList();
134 /// @brief Parse an instruction.
135 bool ParseInstruction (std::vector<unsigned>& Args);
137 /// @brief Parse a constant pool
138 void ParseConstantPool (TypeListTy& List);
140 /// @brief Parse a constant value
141 void ParseConstantValue (unsigned TypeID);
143 /// @brief Parse a block of types.
144 void ParseTypeConstants (TypeListTy &Tab, unsigned NumEntries);
146 /// @brief Parse a single type.
147 const Type *ParseTypeConstant();
149 /// @brief Parse a string constants block
150 void ParseStringConstants (unsigned NumEntries);
156 BufPtr MemStart; ///< Start of the memory buffer
157 BufPtr MemEnd; ///< End of the memory buffer
158 BufPtr BlockStart; ///< Start of current block being parsed
159 BufPtr BlockEnd; ///< End of current block being parsed
160 BufPtr At; ///< Where we're currently parsing at
162 bool reportAlignment; ///< Parser should report alignment?
163 bool reportBlocks; ///< Parser should report blocks?
164 bool reportVBR; ///< Report VBR compression events
166 // Information about the module, extracted from the bytecode revision number.
167 unsigned char RevisionNum; // The rev # itself
169 // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
171 // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
172 // block. This was fixed to be like all other blocks in 1.2
173 bool hasInconsistentModuleGlobalInfo;
175 // Revision #0 also explicitly encoded zero values for primitive types like
177 bool hasExplicitPrimitiveZeros;
179 // Flags to control features specific the LLVM 1.2 and before (revision #1)
181 // LLVM 1.2 and earlier required that getelementptr structure indices were
182 // ubyte constants and that sequential type indices were longs.
183 bool hasRestrictedGEPTypes;
186 /// CompactionTable - If a compaction table is active in the current function,
187 /// this is the mapping that it contains.
188 std::vector<Type*> CompactionTypeTable;
190 // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and
191 // forward references to constants. Such values may be referenced before they
192 // are defined, and if so, the temporary object that they represent is held
195 typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
196 ConstantRefsType ConstantFwdRefs;
198 // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used
199 // to deal with forward references to types.
201 TypeListTy ModuleTypes;
202 TypeListTy FunctionTypes;
204 // When the ModuleGlobalInfo section is read, we create a FunctionType object
205 // for each function in the module. When the function is loaded, this type is
206 // used to instantiate the actual function object.
208 std::vector<Function*> FunctionSignatureList;
210 // Constant values are read in after global variables. Because of this, we
211 // must defer setting the initializers on global variables until after module
212 // level constants have been read. In the mean time, this list keeps track of
215 std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
218 /// @name Implementation Details
221 /// This stores the parser's handler. It makes virtual function calls through
222 /// the BytecodeHandler to notify the handler of parsing events. What the
223 /// handler does with the events is completely orthogonal to the business of
224 /// parsing the bytecode.
225 /// @brief The handler of bytecode parsing events.
226 BytecodeHandler* handler;
228 /// For lazy reading-in of functions, we need to save away several pieces of
229 /// information about each function: its begin and end pointer in the buffer
230 /// and its FunctionSlot.
231 struct LazyFunctionInfo {
232 const unsigned char *Buf, *EndBuf;
233 LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0)
234 : Buf(B), EndBuf(EB) {}
236 typedef std::map<Function*, LazyFunctionInfo> LazyFunctionMap;
237 LazyFunctionMap LazyFunctionLoadMap;
241 /// Is there more to parse in the current block?
242 inline bool moreInBlock();
244 /// Have we read past the end of the block
245 inline void checkPastBlockEnd(const char * block_name);
248 inline void align32();
251 inline unsigned read_uint();
252 inline unsigned read_vbr_uint();
253 inline uint64_t read_vbr_uint64();
254 inline int64_t read_vbr_int64();
255 inline std::string read_str();
256 inline void read_data(void *Ptr, void *End);
258 /// Read a block header
259 inline void readBlock(unsigned &Type, unsigned &Size);
261 const Type *AbstractBytecodeParser::getType(unsigned ID);
262 /// getGlobalTableType - This is just like getType, but when a compaction
263 /// table is in use, it is ignored. Also, no forward references or other
264 /// fancy features are supported.
265 const Type *getGlobalTableType(unsigned Slot) {
266 if (Slot < Type::FirstDerivedTyID) {
267 const Type *Ty = Type::getPrimitiveType((Type::TypeID)Slot);
268 assert(Ty && "Not a primitive type ID?");
271 Slot -= Type::FirstDerivedTyID;
272 if (Slot >= ModuleTypes.size())
273 throw std::string("Illegal compaction table type reference!");
274 return ModuleTypes[Slot];
277 unsigned getGlobalTableTypeSlot(const Type *Ty) {
278 if (Ty->isPrimitiveType())
279 return Ty->getTypeID();
280 TypeListTy::iterator I = find(ModuleTypes.begin(),
281 ModuleTypes.end(), Ty);
282 if (I == ModuleTypes.end())
283 throw std::string("Didn't find type in ModuleTypes.");
284 return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]);
287 AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
288 void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
293 /// This class provides the interface for the handling bytecode events during
294 /// parsing. The methods on this interface are invoked by the
295 /// AbstractBytecodeParser as it discovers the content of a bytecode stream.
296 /// This class provides a a clear separation of concerns between recognizing
297 /// the semantic units of a bytecode file and deciding what to do with them.
298 /// The AbstractBytecodeParser recognizes the content of the bytecode file and
299 /// calls the BytecodeHandler methods to determine what should be done. This
300 /// arrangement allows Bytecode files to be read and handled for a number of
301 /// purposes simply by creating a subclass of BytecodeHandler. None of the
302 /// parsing details need to be understood, only the meaning of the calls
303 /// made on this interface.
305 /// Another paradigm that uses this design pattern is the XML SAX Parser. The
306 /// ContentHandler for SAX plays the same role as the BytecodeHandler here.
307 /// @see AbstractbytecodeParser
308 /// @brief Handle Bytecode Parsing Events
309 class BytecodeHandler {
311 /// @name Constructors And Operators
314 /// @brief Default constructor (empty)
316 /// @brief Virtual destructor (empty)
317 virtual ~BytecodeHandler() {}
320 BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT
321 void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT
324 /// @name Handler Methods
328 /// This method is called whenever the parser detects an error in the
329 /// bytecode formatting. Returning true will cause the parser to keep
330 /// going, however this is inadvisable in most cases. Returning false will
331 /// cause the parser to throw the message as a std::string.
332 /// @brief Handle parsing errors.
333 virtual bool handleError(const std::string& str );
335 /// This method is called at the beginning of a parse before anything is
336 /// read in order to give the handler a chance to initialize.
337 /// @brief Handle the start of a bytecode parse
338 virtual void handleStart();
340 /// This method is called at the end of a parse after everything has been
341 /// read in order to give the handler a chance to terminate.
342 /// @brief Handle the end of a bytecode parse
343 virtual void handleFinish();
345 /// This method is called at the start of a module to indicate that a
346 /// module is being parsed.
347 /// @brief Handle the start of a module.
348 virtual void handleModuleBegin(const std::string& id);
350 /// This method is called at the end of a module to indicate that the module
351 /// previously being parsed has concluded.
352 /// @brief Handle the end of a module.
353 virtual void handleModuleEnd(const std::string& id);
355 /// This method is called once the version information has been parsed. It
356 /// provides the information about the version of the bytecode file being
358 /// @brief Handle the bytecode prolog
359 virtual void handleVersionInfo(
360 unsigned char RevisionNum, ///< Byte code revision number
361 Module::Endianness Endianness, ///< Endianness indicator
362 Module::PointerSize PointerSize ///< PointerSize indicator
365 /// This method is called at the start of a module globals block which
366 /// contains the global variables and the function placeholders
367 virtual void handleModuleGlobalsBegin();
369 /// This method is called when a non-initialized global variable is
370 /// recognized. Its type, constness, and linkage type are provided.
371 /// @brief Handle a non-initialized global variable
372 virtual void handleGlobalVariable(
373 const Type* ElemType, ///< The type of the global variable
374 bool isConstant, ///< Whether the GV is constant or not
375 GlobalValue::LinkageTypes ///< The linkage type of the GV
378 /// This method is called when an initialized global variable is recognized.
379 /// Its type constness, linkage type, and the slot number of the initializer
381 /// @brief Handle an intialized global variable.
382 virtual void handleInitializedGV(
383 const Type* ElemType, ///< The type of the global variable
384 bool isConstant, ///< Whether the GV is constant or not
385 GlobalValue::LinkageTypes,///< The linkage type of the GV
386 unsigned initSlot ///< Slot number of GV's initializer
389 /// This method is called when a new type is recognized. The type is
390 /// converted from the bytecode and passed to this method.
391 /// @brief Handle a type
392 virtual void handleType( const Type* Ty );
394 /// This method is called when the function prototype for a function is
395 /// encountered in the module globals block.
396 virtual void handleFunctionDeclaration(
398 const FunctionType* FuncType ///< The type of the function
401 /// This method is called at the end of the module globals block.
402 /// @brief Handle end of module globals block.
403 virtual void handleModuleGlobalsEnd();
405 /// This method is called at the beginning of a compaction table.
406 /// @brief Handle start of compaction table.
407 virtual void handleCompactionTableBegin();
409 /// @brief Handle start of a compaction table plane
410 virtual void handleCompactionTablePlane(
416 /// @brief Handle a type entry in the compaction table
417 virtual void handleCompactionTableType(
423 /// @brief Handle a value entry in the compaction table
424 virtual void handleCompactionTableValue(
430 /// @brief Handle end of a compaction table
431 virtual void handleCompactionTableEnd();
433 /// @brief Handle start of a symbol table
434 virtual void handleSymbolTableBegin();
436 /// @brief Handle start of a symbol table plane
437 virtual void handleSymbolTablePlane(
443 /// @brief Handle a named type in the symbol table
444 virtual void handleSymbolTableType(
447 const std::string& name
450 /// @brief Handle a named value in the symbol table
451 virtual void handleSymbolTableValue(
454 const std::string& name
457 /// @brief Handle the end of a symbol table
458 virtual void handleSymbolTableEnd();
460 /// @brief Handle the beginning of a function body
461 virtual void handleFunctionBegin(
462 Function* Func, unsigned Size
465 /// @brief Handle the end of a function body
466 virtual void handleFunctionEnd(
470 /// @brief Handle the beginning of a basic block
471 virtual void handleBasicBlockBegin(
475 /// This method is called for each instruction that is parsed.
476 /// @returns true if the instruction is a block terminating instruction
477 /// @brief Handle an instruction
478 virtual bool handleInstruction(
481 std::vector<unsigned>& Operands,
485 /// @brief Handle the end of a basic block
486 virtual void handleBasicBlockEnd(unsigned blocknum);
488 /// @brief Handle start of global constants block.
489 virtual void handleGlobalConstantsBegin();
491 /// @brief Handle a constant expression
492 virtual void handleConstantExpression(
495 std::vector<std::pair<const Type*,unsigned> > ArgVec
498 /// @brief Handle a constant array
499 virtual void handleConstantArray(
501 std::vector<unsigned>& ElementSlots
504 /// @brief Handle a constant structure
505 virtual void handleConstantStruct(
506 const StructType* ST,
507 std::vector<unsigned>& ElementSlots
510 /// @brief Handle a constant pointer
511 virtual void handleConstantPointer(
512 const PointerType* PT,
516 /// @brief Handle a constant strings (array special case)
517 virtual void handleConstantString(
518 const ConstantArray* CA
521 /// @brief Handle a primitive constant value
522 virtual void handleConstantValue( Constant * c );
524 /// @brief Handle the end of the global constants
525 virtual void handleGlobalConstantsEnd();
527 /// @brief Handle an alignment event
528 virtual void handleAlignment(unsigned numBytes);
530 virtual void handleBlock(
531 unsigned BType, ///< The type of block
532 const unsigned char* StartPtr, ///< The start of the block
533 unsigned Size ///< The size of the block
535 virtual void handleVBR32(unsigned Size );
536 virtual void handleVBR64(unsigned Size );
541 } // End llvm namespace