lib/CodeGen/AsmPrinter/DwarfAccelTable.h

   1 //==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file contains support for writing dwarf accelerator tables.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
  15 #define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
  16
  17 #include "llvm/ADT/StringMap.h"
  18 #include "llvm/MC/MCSymbol.h"
  19 #include "llvm/Support/Dwarf.h"
  20 #include "llvm/Support/DataTypes.h"
  21 #include "llvm/Support/Debug.h"
  22 #include "llvm/Support/ErrorHandling.h"
  23 #include "llvm/Support/Format.h"
  24 #include "llvm/Support/FormattedStream.h"
  25 #include <vector>
  26 #include <map>
  27
  28 // The apple dwarf accelerator tables are an indirect hash table optimized
  29 // for null lookup rather than access to known data. They are output into
  30 // an on-disk format that looks like this:
  31 //
  32 // .-------------.
  33 // |  HEADER     |
  34 // |-------------|
  35 // |  BUCKETS    |
  36 // |-------------|
  37 // |  HASHES     |
  38 // |-------------|
  39 // |  OFFSETS    |
  40 // |-------------|
  41 // |  DATA       |
  42 // `-------------'
  43 //
  44 // where the header contains a magic number, version, type of hash function,
  45 // the number of buckets, total number of hashes, and room for a special
  46 // struct of data and the length of that struct.
  47 //
  48 // The buckets contain an index (e.g. 6) into the hashes array. The hashes
  49 // section contains all of the 32-bit hash values in contiguous memory, and
  50 // the offsets contain the offset into the data area for the particular
  51 // hash.
  52 //
  53 // For a lookup example, we could hash a function name and take it modulo the
  54 // number of buckets giving us our bucket. From there we take the bucket value
  55 // as an index into the hashes table and look at each successive hash as long
  56 // as the hash value is still the same modulo result (bucket value) as earlier.
  57 // If we have a match we look at that same entry in the offsets table and
  58 // grab the offset in the data for our final match.
  59
  60 namespace llvm {
  61
  62 class AsmPrinter;
  63 class DIE;
  64 class DwarfDebug;
  65
  66 class DwarfAccelTable {
  67
  68   enum HashFunctionType {
  69     eHashFunctionDJB = 0u
  70   };
  71
  72   static uint32_t HashDJB (const char *s) {
  73     uint32_t h = 5381;
  74     for (unsigned char c = *s; c; c = *++s)
  75       h = ((h << 5) + h) + c;
  76     return h;
  77   }
  78
  79   // Helper function to compute the number of buckets needed based on
  80   // the number of unique hashes.
  81   void ComputeBucketCount (void);
  82
  83   struct TableHeader {
  84     uint32_t   magic;           // 'HASH' magic value to allow endian detection
  85     uint16_t   version;         // Version number.
  86     uint16_t   hash_function;   // The hash function enumeration that was used.
  87     uint32_t   bucket_count;    // The number of buckets in this hash table.
  88     uint32_t   hashes_count;    // The total number of unique hash values
  89                                 // and hash data offsets in this table.
  90     uint32_t   header_data_len; // The bytes to skip to get to the hash
  91                                 // indexes (buckets) for correct alignment.
  92     // Also written to disk is the implementation specific header data.
  93
  94     static const uint32_t MagicHash = 0x48415348;
  95
  96     TableHeader (uint32_t data_len) :
  97       magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
  98       bucket_count (0), hashes_count (0), header_data_len (data_len)
  99     {};
 100
 101 #ifndef NDEBUG
 102     void print(raw_ostream &O) {
 103       O << "Magic: " << format("0x%x", magic) << "\n"
 104         << "Version: " << version << "\n"
 105         << "Hash Function: " << hash_function << "\n"
 106         << "Bucket Count: " << bucket_count << "\n"
 107         << "Header Data Length: " << header_data_len << "\n";
 108     }
 109     void dump() { print(dbgs()); }
 110 #endif
 111   };
 112
 113 public:
 114   // The HeaderData describes the form of each set of data. In general this
 115   // is as a list of atoms (atom_count) where each atom contains a type
 116   // (AtomType type) of data, and an encoding form (form). In the case of
 117   // data that is referenced via DW_FORM_ref_* the die_offset_base is
 118   // used to describe the offset for all forms in the list of atoms.
 119   // This also serves as a public interface of sorts.
 120   // When written to disk this will have the form:
 121   //
 122   // uint32_t die_offset_base
 123   // uint32_t atom_count
 124   // atom_count Atoms
 125   enum AtomType {
 126     eAtomTypeNULL       = 0u,
 127     eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
 128     eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that
 129                                 // contains the item in question
 130     eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as
 131                                 // DW_FORM_data1 (if no tags exceed 255) or
 132                                 // DW_FORM_data2.
 133     eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
 134     eAtomTypeTypeFlags  = 5u    // Flags from enum TypeFlags
 135   };
 136
 137   // Make these public so that they can be used as a general interface to
 138   // the class.
 139   struct Atom {
 140     AtomType type; // enum AtomType
 141     uint16_t form; // DWARF DW_FORM_ defines
 142
 143     Atom(AtomType type, uint16_t form) : type(type), form(form) {};
 144     static const char * AtomTypeString(enum AtomType);
 145 #ifndef NDEBUG
 146     void print(raw_ostream &O) {
 147       O << "Type: " << dwarf::TagString(type) << "\n"
 148         << "Form: " << dwarf::FormEncodingString(form) << "\n";
 149     }
 150     void dump() {
 151       print(dbgs());
 152     }
 153 #endif
 154   };
 155
 156  private:
 157   struct TableHeaderData {
 158
 159     uint32_t die_offset_base;
 160     std::vector<Atom> Atoms;
 161
 162     TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0)
 163     : die_offset_base(offset) {
 164       Atoms.push_back(Atom);
 165     }
 166
 167 #ifndef NDEBUG
 168     void print (raw_ostream &O) {
 169       O << "die_offset_base: " << die_offset_base << "\n";
 170       for (size_t i = 0; i < Atoms.size(); i++)
 171         Atoms[i].print(O);
 172     }
 173     void dump() {
 174       print(dbgs());
 175     }
 176 #endif
 177   };
 178
 179   // The data itself consists of a str_offset (to deal with collisions in
 180   // some magical way? this looks like the KeyType from the spec, which
 181   // should mean an integer compare on read), a count of the DIEs in the
 182   // hash and the offsets to the DIEs themselves.
 183   // On disk each data section is ended with a 0 KeyType as the end of the
 184   // hash chain.
 185   // On output this looks like:
 186   // uint32_t str_offset
 187   // uint32_t hash_data_count
 188   // HashData[hash_data_count]
 189   struct HashData {
 190     StringRef Str;
 191     uint32_t HashValue;
 192     MCSymbol *Sym;
 193     std::vector<uint32_t> DIEOffsets; // offsets
 194     HashData(StringRef S) : Str(S) {
 195       HashValue = DwarfAccelTable::HashDJB(S.str().c_str());
 196     }
 197     void addOffset(uint32_t off) { DIEOffsets.push_back(off); }
 198     #ifndef NDEBUG
 199     void print(raw_ostream &O) {
 200       O << "Name: " << Str << "\n";
 201       O << "  Hash Value: " << format("0x%x", HashValue) << "\n";
 202       O << "  Symbol: " ;
 203       if (Sym) Sym->print(O);
 204       else O << "<none>";
 205       O << "\n";
 206       for (size_t i = 0; i < DIEOffsets.size(); i++)
 207         O << "  Offset: " << DIEOffsets[i] << "\n";
 208     }
 209     void dump() {
 210       print(dbgs());
 211     }
 212     #endif
 213   };
 214
 215   DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT
 216   void operator=(const DwarfAccelTable&);  // DO NOT IMPLEMENT
 217
 218   // Internal Functions
 219   void EmitHeader(AsmPrinter *);
 220   void EmitBuckets(AsmPrinter *);
 221   void EmitHashes(AsmPrinter *);
 222   void EmitOffsets(AsmPrinter *, MCSymbol *);
 223   void EmitData(AsmPrinter *, DwarfDebug *D);
 224
 225   // Output Variables
 226   TableHeader Header;
 227   TableHeaderData HeaderData;
 228   std::vector<HashData*> Data;
 229
 230   // String Data
 231   typedef std::vector<DIE*> DIEArray;
 232   typedef StringMap<DIEArray> StringEntries;
 233   StringEntries Entries;
 234
 235   // Buckets/Hashes/Offsets
 236   typedef std::vector<HashData*> HashList;
 237   typedef std::vector<HashList> BucketList;
 238   BucketList Buckets;
 239   HashList Hashes;
 240
 241   // Public Implementation
 242  public:
 243   DwarfAccelTable(DwarfAccelTable::Atom Atom);
 244   void AddName(StringRef, DIE*);
 245   void FinalizeTable(AsmPrinter *, const char *);
 246   void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *);
 247 #ifndef NDEBUG
 248   void print(raw_ostream &O);
 249   void dump() { print(dbgs()); }
 250 #endif
 251 };
 252
 253 }
 254 #endif