Add boolean file format flags in preparation for version 5 bytecode.

[oota-llvm.git] / lib / Bytecode / Reader / Reader.h
diff --git a/lib/Bytecode/Reader/Reader.h b/lib/Bytecode/Reader/Reader.h

index 89ab0a59b9411c51e4d4ba4626023e5d1d2e571f..80dbea986dd0c45cc854c871b6d852393108a983 100644 (file)
--- a/lib/Bytecode/Reader/Reader.h
+++ b/lib/Bytecode/Reader/Reader.h
@@ -56,6 +56,7 @@ public:
  /// @name Types
  /// @{
  public:
+
    /// @brief A convenience type for the buffer pointer
    typedef const unsigned char* BufPtr;
  
@@ -72,7 +73,7 @@ public:
    /// globals section.
    /// @brief A list of values as a User of those Values.
    struct ValueList : public User {
-    ValueList() : User(Type::VoidTy, Value::FunctionVal) {}
+    ValueList() : User(Type::VoidTy, Value::ValueListVal) {}
  
      // vector compatibility methods
      unsigned size() const { return getNumOperands(); }
@@ -129,8 +130,7 @@ public:
    void ParseBytecode(
       const unsigned char *Buf,    ///< Beginning of the bytecode buffer
       unsigned Length,             ///< Length of the bytecode buffer
-     const std::string &ModuleID, ///< An identifier for the module constructed.
-     bool processFunctions=false  ///< Process all function bodies fully.
+     const std::string &ModuleID  ///< An identifier for the module constructed.
    );
  
    /// @brief Parse all function bodies
@@ -192,7 +192,7 @@ protected:
    void ParseFunctionBody(Function* Func);
  
    /// @brief Parse the type list portion of a compaction table
-  void BytecodeReader::ParseCompactionTypes( unsigned NumEntries );
+  void ParseCompactionTypes(unsigned NumEntries);
  
    /// @brief Parse a compaction table
    void ParseCompactionTable();
@@ -242,12 +242,13 @@ private:
    BufPtr At;           ///< Where we're currently parsing at
  
    /// Information about the module, extracted from the bytecode revision number.
+  ///
    unsigned char RevisionNum;        // The rev # itself
  
    /// Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
  
-  /// Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
-  /// block.  This was fixed to be like all other blocks in 1.2
+  /// Revision #0 had an explicit alignment of data only for the
+  /// ModuleGlobalInfo block.  This was fixed to be like all other blocks in 1.2
    bool hasInconsistentModuleGlobalInfo;
  
    /// Revision #0 also explicitly encoded zero values for primitive types like
@@ -268,9 +269,54 @@ private:
    /// from Value style of bytecode file is being read.
    bool hasTypeDerivedFromValue;
  
-  /// CompactionTable - If a compaction table is active in the current function,
-  /// this is the mapping that it contains.
-  std::vector<const Type*> CompactionTypes;
+  /// LLVM 1.2 and earlier encoded block headers as two uint (8 bytes), one for
+  /// the size and one for the type. This is a bit wasteful, especially for
+  /// small files where the 8 bytes per block is a large fraction of the total
+  /// block size. In LLVM 1.3, the block type and length are encoded into a
+  /// single uint32 by restricting the number of block types (limit 31) and the
+  /// maximum size of a block (limit 2^27-1=134,217,727). Note that the module
+  /// block still uses the 8-byte format so the maximum size of a file can be
+  /// 2^32-1 bytes long.
+  bool hasLongBlockHeaders;
+
+  /// LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3
+  /// this has been reduced to vbr_uint24. It shouldn't make much difference 
+  /// since we haven't run into a module with > 24 million types, but for safety
+  /// the 24-bit restriction has been enforced in 1.3 to free some bits in
+  /// various places and to ensure consistency. In particular, global vars are
+  /// restricted to 24-bits.
+  bool has32BitTypes;
+
+  /// LLVM 1.2 and earlier did not provide a target triple nor a list of 
+  /// libraries on which the bytecode is dependent. LLVM 1.3 provides these
+  /// features, for use in future versions of LLVM.
+  bool hasNoDependentLibraries;
+
+  /// LLVM 1.3 and earlier caused blocks and other fields to start on 32-bit
+  /// aligned boundaries. This can lead to as much as 30% bytecode size overhead
+  /// in various corner cases (lots of long instructions). In LLVM 1.4,
+  /// alignment of bytecode fields was done away with completely.
+  bool hasAlignment;
+
+  // In version 4, basic blocks have a minimum index of 0 whereas all the 
+  // other primitives have a minimum index of 1 (because 0 is the "null" 
+  // value. In version 5, we made this consistent.
+  bool hasInconsistentBBSlotNums;
+
+  // In version 4, the types SByte and UByte were encoded as vbr_uint so that
+  // signed values > 63 and unsigned values >127 would be encoded as two
+  // bytes. In version 5, they are encoded directly in a single byte.
+  bool hasVBRByteTypes;
+
+  // In version 4, modules begin with a "Module Block" which encodes a 4-byte
+  // integer value 0x01 to identify the module block. This is unnecessary and
+  // removed in version 5.
+  bool hasUnnecessaryModuleBlockId;
+
+  /// CompactionTypes - If a compaction table is active in the current function,
+  /// this is the mapping that it contains.  We keep track of what resolved type
+  /// it is as well as what global type entry it is.
+  std::vector<std::pair<const Type*, unsigned> > CompactionTypes;
  
    /// @brief If a compaction table is active in the current function,
    /// this is the mapping that it contains.
@@ -366,8 +412,9 @@ private:
    /// @brief Get a value from its typeid and slot number
    Value* getValue(unsigned TypeID, unsigned num, bool Create = true);
  
-  /// @brief Get a value from its type and slot number, ignoring compaction tables.
-  Value *getGlobalTableValue(const Type *Ty, unsigned SlotNo);
+  /// @brief Get a value from its type and slot number, ignoring compaction
+  /// tables.
+  Value *getGlobalTableValue(unsigned TyID, unsigned SlotNo);
  
    /// @brief Get a basic block for current function
    BasicBlock *getBasicBlock(unsigned ID);
@@ -430,6 +477,10 @@ private:
    /// @brief Read an unsigned integer with variable bit rate encoding
    inline unsigned read_vbr_uint();
  
+  /// @brief Read an unsigned integer of no more than 24-bits with variable
+  /// bit rate encoding.
+  inline unsigned read_vbr_uint24();
+
    /// @brief Read an unsigned 64-bit integer with variable bit rate encoding.
    inline uint64_t read_vbr_uint64();
  
@@ -461,7 +512,8 @@ private:
  
  /// @brief A function for creating a BytecodeAnalzer as a handler
  /// for the Bytecode reader.
-BytecodeHandler* createBytecodeAnalyzerHandler(BytecodeAnalysis& bca );
+BytecodeHandler* createBytecodeAnalyzerHandler(BytecodeAnalysis& bca, 
+                                               std::ostream* output );
  
  
  } // End llvm namespace