lib/Support/StringMap.cpp

   1 //===--- StringMap.cpp - String Hash table map implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the StringMap class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/StringMap.h"
  15 #include "llvm/ADT/StringExtras.h"
  16 #include <cassert>
  17 using namespace llvm;
  18
  19 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
  20   ItemSize = itemSize;
  21
  22   // If a size is specified, initialize the table with that many buckets.
  23   if (InitSize) {
  24     init(InitSize);
  25     return;
  26   }
  27
  28   // Otherwise, initialize it with zero buckets to avoid the allocation.
  29   TheTable = 0;
  30   NumBuckets = 0;
  31   NumItems = 0;
  32   NumTombstones = 0;
  33 }
  34
  35 void StringMapImpl::init(unsigned InitSize) {
  36   assert((InitSize & (InitSize-1)) == 0 &&
  37          "Init Size must be a power of 2 or zero!");
  38   NumBuckets = InitSize ? InitSize : 16;
  39   NumItems = 0;
  40   NumTombstones = 0;
  41
  42   TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
  43                                            sizeof(StringMapEntryBase **) +
  44                                            sizeof(unsigned));
  45
  46   // Allocate one extra bucket, set it to look filled so the iterators stop at
  47   // end.
  48   TheTable[NumBuckets] = (StringMapEntryBase*)2;
  49 }
  50
  51
  52 /// LookupBucketFor - Look up the bucket that the specified string should end
  53 /// up in.  If it already exists as a key in the map, the Item pointer for the
  54 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
  55 /// case, the FullHashValue field of the bucket will be set to the hash value
  56 /// of the string.
  57 unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
  58   unsigned HTSize = NumBuckets;
  59   if (HTSize == 0) {  // Hash table unallocated so far?
  60     init(16);
  61     HTSize = NumBuckets;
  62   }
  63   unsigned FullHashValue = HashString(Name);
  64   unsigned BucketNo = FullHashValue & (HTSize-1);
  65   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
  66
  67   unsigned ProbeAmt = 1;
  68   int FirstTombstone = -1;
  69   while (1) {
  70     StringMapEntryBase *BucketItem = TheTable[BucketNo];
  71     // If we found an empty bucket, this key isn't in the table yet, return it.
  72     if (BucketItem == 0) {
  73       // If we found a tombstone, we want to reuse the tombstone instead of an
  74       // empty bucket.  This reduces probing.
  75       if (FirstTombstone != -1) {
  76         HashTable[FirstTombstone] = FullHashValue;
  77         return FirstTombstone;
  78       }
  79
  80       HashTable[BucketNo] = FullHashValue;
  81       return BucketNo;
  82     }
  83
  84     if (BucketItem == getTombstoneVal()) {
  85       // Skip over tombstones.  However, remember the first one we see.
  86       if (FirstTombstone == -1) FirstTombstone = BucketNo;
  87     } else if (HashTable[BucketNo] == FullHashValue) {
  88       // If the full hash value matches, check deeply for a match.  The common
  89       // case here is that we are only looking at the buckets (for item info
  90       // being non-null and for the full hash value) not at the items.  This
  91       // is important for cache locality.
  92
  93       // Do the comparison like this because Name isn't necessarily
  94       // null-terminated!
  95       char *ItemStr = (char*)BucketItem+ItemSize;
  96       if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
  97         // We found a match!
  98         return BucketNo;
  99       }
 100     }
 101
 102     // Okay, we didn't find the item.  Probe to the next bucket.
 103     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
 104
 105     // Use quadratic probing, it has fewer clumping artifacts than linear
 106     // probing and has good cache behavior in the common case.
 107     ++ProbeAmt;
 108   }
 109 }
 110
 111
 112 /// FindKey - Look up the bucket that contains the specified key. If it exists
 113 /// in the map, return the bucket number of the key.  Otherwise return -1.
 114 /// This does not modify the map.
 115 int StringMapImpl::FindKey(StringRef Key) const {
 116   unsigned HTSize = NumBuckets;
 117   if (HTSize == 0) return -1;  // Really empty table?
 118   unsigned FullHashValue = HashString(Key);
 119   unsigned BucketNo = FullHashValue & (HTSize-1);
 120   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 121
 122   unsigned ProbeAmt = 1;
 123   while (1) {
 124     StringMapEntryBase *BucketItem = TheTable[BucketNo];
 125     // If we found an empty bucket, this key isn't in the table yet, return.
 126     if (BucketItem == 0)
 127       return -1;
 128
 129     if (BucketItem == getTombstoneVal()) {
 130       // Ignore tombstones.
 131     } else if (HashTable[BucketNo] == FullHashValue) {
 132       // If the full hash value matches, check deeply for a match.  The common
 133       // case here is that we are only looking at the buckets (for item info
 134       // being non-null and for the full hash value) not at the items.  This
 135       // is important for cache locality.
 136
 137       // Do the comparison like this because NameStart isn't necessarily
 138       // null-terminated!
 139       char *ItemStr = (char*)BucketItem+ItemSize;
 140       if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
 141         // We found a match!
 142         return BucketNo;
 143       }
 144     }
 145
 146     // Okay, we didn't find the item.  Probe to the next bucket.
 147     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
 148
 149     // Use quadratic probing, it has fewer clumping artifacts than linear
 150     // probing and has good cache behavior in the common case.
 151     ++ProbeAmt;
 152   }
 153 }
 154
 155 /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
 156 /// delete it.  This aborts if the value isn't in the table.
 157 void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
 158   const char *VStr = (char*)V + ItemSize;
 159   StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
 160   (void)V2;
 161   assert(V == V2 && "Didn't find key?");
 162 }
 163
 164 /// RemoveKey - Remove the StringMapEntry for the specified key from the
 165 /// table, returning it.  If the key is not in the table, this returns null.
 166 StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
 167   int Bucket = FindKey(Key);
 168   if (Bucket == -1) return 0;
 169
 170   StringMapEntryBase *Result = TheTable[Bucket];
 171   TheTable[Bucket] = getTombstoneVal();
 172   --NumItems;
 173   ++NumTombstones;
 174   assert(NumItems + NumTombstones <= NumBuckets);
 175
 176   return Result;
 177 }
 178
 179
 180
 181 /// RehashTable - Grow the table, redistributing values into the buckets with
 182 /// the appropriate mod-of-hashtable-size.
 183 void StringMapImpl::RehashTable() {
 184   unsigned NewSize;
 185   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 186
 187   // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
 188   // the buckets are empty (meaning that many are filled with tombstones),
 189   // grow/rehash the table.
 190   if (NumItems*4 > NumBuckets*3) {
 191     NewSize = NumBuckets*2;
 192   } else if (NumBuckets-(NumItems+NumTombstones) < NumBuckets/8) {
 193     NewSize = NumBuckets;
 194   } else {
 195     return;
 196   }
 197
 198   // Allocate one extra bucket which will always be non-empty.  This allows the
 199   // iterators to stop at end.
 200   StringMapEntryBase **NewTableArray =
 201     (StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
 202                                              sizeof(unsigned));
 203   unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
 204   NewTableArray[NewSize] = (StringMapEntryBase*)2;
 205
 206   // Rehash all the items into their new buckets.  Luckily :) we already have
 207   // the hash values available, so we don't have to rehash any strings.
 208   for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
 209     StringMapEntryBase *Bucket = TheTable[I];
 210     if (Bucket && Bucket != getTombstoneVal()) {
 211       // Fast case, bucket available.
 212       unsigned FullHash = HashTable[I];
 213       unsigned NewBucket = FullHash & (NewSize-1);
 214       if (NewTableArray[NewBucket] == 0) {
 215         NewTableArray[FullHash & (NewSize-1)] = Bucket;
 216         NewHashArray[FullHash & (NewSize-1)] = FullHash;
 217         continue;
 218       }
 219
 220       // Otherwise probe for a spot.
 221       unsigned ProbeSize = 1;
 222       do {
 223         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
 224       } while (NewTableArray[NewBucket]);
 225
 226       // Finally found a slot.  Fill it in.
 227       NewTableArray[NewBucket] = Bucket;
 228       NewHashArray[NewBucket] = FullHash;
 229     }
 230   }
 231
 232   free(TheTable);
 233
 234   TheTable = NewTableArray;
 235   NumBuckets = NewSize;
 236   NumTombstones = 0;
 237 }