//===-- Reader.h - Interface To Bytecode Reading ----------------*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Reid Spencer and is distributed under the
+// This file was developed by Reid Spencer and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
-// This header file defines the interface to the Bytecode Reader which is
+// This header file defines the interface to the Bytecode Reader which is
// responsible for correctly interpreting bytecode files (backwards compatible)
// and materializing a module from the bytecode read.
//
/// This class defines the interface for parsing a buffer of bytecode. The
/// parser itself takes no action except to call the various functions of
/// the handler interface. The parser's sole responsibility is the correct
-/// interpretation of the bytecode buffer. The handler is responsible for
-/// instantiating and keeping track of all values. As a convenience, the parser
+/// interpretation of the bytecode buffer. The handler is responsible for
+/// instantiating and keeping track of all values. As a convenience, the parser
/// is responsible for materializing types and will pass them through the
/// handler interface as necessary.
/// @see BytecodeHandler
/// @{
public:
/// @brief Default constructor. By default, no handler is used.
- BytecodeReader(
- BytecodeHandler* h = 0
- ) {
- Handler = h;
+ BytecodeReader(BytecodeHandler* h = 0) {
+ decompressedBlock = 0;
+ Handler = h;
}
- ~BytecodeReader() { freeState(); }
+ ~BytecodeReader() {
+ freeState();
+ if (decompressedBlock) {
+ ::free(decompressedBlock);
+ decompressedBlock = 0;
+ }
+ }
/// @}
/// @name Types
/// @{
public:
+
/// @brief A convenience type for the buffer pointer
typedef const unsigned char* BufPtr;
/// constants with global variables at the end of reading the
/// globals section.
/// @brief A list of values as a User of those Values.
- struct ValueList : public User {
- ValueList() : User(Type::VoidTy, Value::FunctionVal) {}
+ class ValueList : public User {
+ std::vector<Use> Uses;
+ public:
+ ValueList() : User(Type::VoidTy, Value::ArgumentVal, 0, 0) {}
// vector compatibility methods
unsigned size() const { return getNumOperands(); }
- void push_back(Value *V) { Operands.push_back(Use(V, this)); }
- Value *back() const { return Operands.back(); }
- void pop_back() { Operands.pop_back(); }
- bool empty() const { return Operands.empty(); }
- // must override this
+ void push_back(Value *V) {
+ Uses.push_back(Use(V, this));
+ OperandList = &Uses[0];
+ ++NumOperands;
+ }
+ Value *back() const { return Uses.back(); }
+ void pop_back() { Uses.pop_back(); --NumOperands; }
+ bool empty() const { return NumOperands == 0; }
virtual void print(std::ostream& os) const {
- for ( unsigned i = 0; i < size(); i++ ) {
+ for (unsigned i = 0; i < size(); ++i) {
os << i << " ";
getOperand(i)->print(os);
os << "\n";
/// @brief A 2 dimensional table of values
typedef std::vector<ValueList*> ValueTable;
- /// This map is needed so that forward references to constants can be looked
+ /// This map is needed so that forward references to constants can be looked
/// up by Type and slot number when resolving those references.
/// @brief A mapping of a Type/slot pair to a Constant*.
- typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
+ typedef std::map<std::pair<unsigned,unsigned>, Constant*> ConstantRefsType;
/// For lazy read-in of functions, we need to save the location in the
/// data stream where the function is located. This structure provides that
/// information. Lazy read-in is used mostly by the JIT which only wants to
- /// resolve functions as it needs them.
+ /// resolve functions as it needs them.
/// @brief Keeps pointers to function contents for later use.
struct LazyFunctionInfo {
const unsigned char *Buf, *EndBuf;
void ParseBytecode(
const unsigned char *Buf, ///< Beginning of the bytecode buffer
unsigned Length, ///< Length of the bytecode buffer
- const std::string &ModuleID, ///< An identifier for the module constructed.
- bool processFunctions=false ///< Process all function bodies fully.
+ const std::string &ModuleID ///< An identifier for the module constructed.
);
/// @brief Parse all function bodies
}
/// This method is abstract in the parent ModuleProvider class. Its
- /// implementation is identical to ParseAllFunctionBodies.
+ /// implementation is identical to ParseAllFunctionBodies.
/// @see ParseAllFunctionBodies
/// @brief Make the whole module materialize
virtual Module* materializeModule() {
void ParseFunctionBody(Function* Func);
/// @brief Parse the type list portion of a compaction table
- void BytecodeReader::ParseCompactionTypes( unsigned NumEntries );
+ void ParseCompactionTypes(unsigned NumEntries);
/// @brief Parse a compaction table
void ParseCompactionTable();
unsigned ParseInstructionList(
Function* F ///< The function into which BBs will be inserted
);
-
+
/// @brief Parse a single instruction.
void ParseInstruction(
std::vector<unsigned>& Args, ///< The arguments to be filled in
);
/// @brief Parse the whole constant pool
- void ParseConstantPool(ValueTable& Values, TypeListTy& Types,
+ void ParseConstantPool(ValueTable& Values, TypeListTy& Types,
bool isFunction);
/// @brief Parse a single constant value
Constant* ParseConstantValue(unsigned TypeID);
/// @brief Parse a block of types constants
- void ParseTypeConstants(TypeListTy &Tab, unsigned NumEntries);
+ void ParseTypes(TypeListTy &Tab, unsigned NumEntries);
/// @brief Parse a single type constant
- const Type *ParseTypeConstant();
+ const Type *ParseType();
/// @brief Parse a string constants block
void ParseStringConstants(unsigned NumEntries, ValueTable &Tab);
/// @name Data
/// @{
private:
+ char* decompressedBlock; ///< Result of decompression
BufPtr MemStart; ///< Start of the memory buffer
BufPtr MemEnd; ///< End of the memory buffer
BufPtr BlockStart; ///< Start of current block being parsed
BufPtr At; ///< Where we're currently parsing at
/// Information about the module, extracted from the bytecode revision number.
+ ///
unsigned char RevisionNum; // The rev # itself
/// Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
- /// Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
- /// block. This was fixed to be like all other blocks in 1.2
+ /// Revision #0 had an explicit alignment of data only for the
+ /// ModuleGlobalInfo block. This was fixed to be like all other blocks in 1.2
bool hasInconsistentModuleGlobalInfo;
/// Revision #0 also explicitly encoded zero values for primitive types like
/// from Value style of bytecode file is being read.
bool hasTypeDerivedFromValue;
- /// CompactionTable - If a compaction table is active in the current function,
- /// this is the mapping that it contains.
- std::vector<const Type*> CompactionTypes;
+ /// LLVM 1.2 and earlier encoded block headers as two uint (8 bytes), one for
+ /// the size and one for the type. This is a bit wasteful, especially for
+ /// small files where the 8 bytes per block is a large fraction of the total
+ /// block size. In LLVM 1.3, the block type and length are encoded into a
+ /// single uint32 by restricting the number of block types (limit 31) and the
+ /// maximum size of a block (limit 2^27-1=134,217,727). Note that the module
+ /// block still uses the 8-byte format so the maximum size of a file can be
+ /// 2^32-1 bytes long.
+ bool hasLongBlockHeaders;
+
+ /// LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3
+ /// this has been reduced to vbr_uint24. It shouldn't make much difference
+ /// since we haven't run into a module with > 24 million types, but for safety
+ /// the 24-bit restriction has been enforced in 1.3 to free some bits in
+ /// various places and to ensure consistency. In particular, global vars are
+ /// restricted to 24-bits.
+ bool has32BitTypes;
+
+ /// LLVM 1.2 and earlier did not provide a target triple nor a list of
+ /// libraries on which the bytecode is dependent. LLVM 1.3 provides these
+ /// features, for use in future versions of LLVM.
+ bool hasNoDependentLibraries;
+
+ /// LLVM 1.3 and earlier caused blocks and other fields to start on 32-bit
+ /// aligned boundaries. This can lead to as much as 30% bytecode size overhead
+ /// in various corner cases (lots of long instructions). In LLVM 1.4,
+ /// alignment of bytecode fields was done away with completely.
+ bool hasAlignment;
+
+ // In version 4 and earlier, the bytecode format did not support the 'undef'
+ // constant.
+ bool hasNoUndefValue;
+
+ // In version 4 and earlier, the bytecode format did not save space for flags
+ // in the global info block for functions.
+ bool hasNoFlagsForFunctions;
+
+ // In version 4 and earlier, there was no opcode space reserved for the
+ // unreachable instruction.
+ bool hasNoUnreachableInst;
+
+ /// CompactionTypes - If a compaction table is active in the current function,
+ /// this is the mapping that it contains. We keep track of what resolved type
+ /// it is as well as what global type entry it is.
+ std::vector<std::pair<const Type*, unsigned> > CompactionTypes;
/// @brief If a compaction table is active in the current function,
/// this is the mapping that it contains.
/// @brief The basic blocks we've parsed, while parsing a function.
std::vector<BasicBlock*> ParsedBasicBlocks;
- /// This maintains a mapping between <Type, Slot #>'s and
- /// forward references to constants. Such values may be referenced before they
- /// are defined, and if so, the temporary object that they represent is held
- /// here.
- /// @brief Temporary place for forward references to constants.
+ /// This maintains a mapping between <Type, Slot #>'s and forward references
+ /// to constants. Such values may be referenced before they are defined, and
+ /// if so, the temporary object that they represent is held here. @brief
+ /// Temporary place for forward references to constants.
ConstantRefsType ConstantFwdRefs;
/// Constant values are read in after global variables. Because of this, we
/// must defer setting the initializers on global variables until after module
- /// level constants have been read. In the mean time, this list keeps track of
- /// what we must do.
+ /// level constants have been read. In the mean time, this list keeps track
+ /// of what we must do.
GlobalInitsList GlobalInits;
// For lazy reading-in of functions, we need to save away several pieces of
// and its FunctionSlot.
LazyFunctionMap LazyFunctionLoadMap;
- /// This stores the parser's handler which is used for handling tasks other
- /// just than reading bytecode into the IR. If this is non-null, calls on
- /// the (polymorphic) BytecodeHandler interface (see llvm/Bytecode/Handler.h)
- /// will be made to report the logical structure of the bytecode file. What
- /// the handler does with the events it receives is completely orthogonal to
+ /// This stores the parser's handler which is used for handling tasks other
+ /// just than reading bytecode into the IR. If this is non-null, calls on
+ /// the (polymorphic) BytecodeHandler interface (see llvm/Bytecode/Handler.h)
+ /// will be made to report the logical structure of the bytecode file. What
+ /// the handler does with the events it receives is completely orthogonal to
/// the business of parsing the bytecode and building the IR. This is used,
/// for example, by the llvm-abcd tool for analysis of byte code.
/// @brief Handler for parsing events.
inline const Type* getSanitizedType(unsigned& ID );
/// @brief Read in and get a sanitized type id
- inline const Type* BytecodeReader::readSanitizedType();
+ inline const Type* readSanitizedType();
/// @brief Converts a Type* to its type slot number
unsigned getTypeSlot(const Type *Ty);
const Type *getGlobalTableType(unsigned TypeId);
/// This is just like getTypeSlot, but when a compaction table is in use,
- /// it is ignored.
+ /// it is ignored.
unsigned getGlobalTableTypeSlot(const Type *Ty);
-
+
/// @brief Get a value from its typeid and slot number
Value* getValue(unsigned TypeID, unsigned num, bool Create = true);
- /// @brief Get a value from its type and slot number, ignoring compaction tables.
- Value *getGlobalTableValue(const Type *Ty, unsigned SlotNo);
+ /// @brief Get a value from its type and slot number, ignoring compaction
+ /// tables.
+ Value *getGlobalTableValue(unsigned TyID, unsigned SlotNo);
/// @brief Get a basic block for current function
BasicBlock *getBasicBlock(unsigned ID);
/// @brief Insert the arguments of a function.
void insertArguments(Function* F );
- /// @brief Resolve all references to the placeholder (if any) for the
+ /// @brief Resolve all references to the placeholder (if any) for the
/// given constant.
- void ResolveReferencesToConstant(Constant *C, unsigned Slot);
+ void ResolveReferencesToConstant(Constant *C, unsigned Typ, unsigned Slot);
/// @brief Release our memory.
void freeState() {
/// @brief Read an unsigned integer with variable bit rate encoding
inline unsigned read_vbr_uint();
+ /// @brief Read an unsigned integer of no more than 24-bits with variable
+ /// bit rate encoding.
+ inline unsigned read_vbr_uint24();
+
/// @brief Read an unsigned 64-bit integer with variable bit rate encoding.
inline uint64_t read_vbr_uint64();
/// @brief Read a string
inline std::string read_str();
+ /// @brief Read a float value
+ inline void read_float(float& FloatVal);
+
+ /// @brief Read a double value
+ inline void read_double(double& DoubleVal);
+
/// @brief Read an arbitrary data chunk of fixed length
inline void read_data(void *Ptr, void *End);
/// @brief A function for creating a BytecodeAnalzer as a handler
/// for the Bytecode reader.
-BytecodeHandler* createBytecodeAnalyzerHandler(BytecodeAnalysis& bca );
+BytecodeHandler* createBytecodeAnalyzerHandler(BytecodeAnalysis& bca,
+ std::ostream* output );
} // End llvm namespace