lib/Support/StringRef.cpp

   1 //===-- StringRef.cpp - Lightweight String References ---------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/ADT/StringRef.h"
  11 #include "llvm/ADT/APInt.h"
  12
  13 using namespace llvm;
  14
  15 // MSVC emits references to this into the translation units which reference it.
  16 #ifndef _MSC_VER
  17 const size_t StringRef::npos;
  18 #endif
  19
  20 static char ascii_tolower(char x) {
  21   if (x >= 'A' && x <= 'Z')
  22     return x - 'A' + 'a';
  23   return x;
  24 }
  25
  26 /// compare_lower - Compare strings, ignoring case.
  27 int StringRef::compare_lower(StringRef RHS) const {
  28   for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
  29     char LHC = ascii_tolower(Data[I]);
  30     char RHC = ascii_tolower(RHS.Data[I]);
  31     if (LHC != RHC)
  32       return LHC < RHC ? -1 : 1;
  33   }
  34
  35   if (Length == RHS.Length)
  36         return 0;
  37   return Length < RHS.Length ? -1 : 1;
  38 }
  39
  40 // Compute the edit distance between the two given strings.
  41 unsigned StringRef::edit_distance(llvm::StringRef Other,
  42                                   bool AllowReplacements) {
  43   // The algorithm implemented below is the "classic"
  44   // dynamic-programming algorithm for computing the Levenshtein
  45   // distance, which is described here:
  46   //
  47   //   http://en.wikipedia.org/wiki/Levenshtein_distance
  48   //
  49   // Although the algorithm is typically described using an m x n
  50   // array, only two rows are used at a time, so this implemenation
  51   // just keeps two separate vectors for those two rows.
  52   size_type m = size();
  53   size_type n = Other.size();
  54
  55   const unsigned SmallBufferSize = 64;
  56   unsigned SmallBuffer[SmallBufferSize];
  57   unsigned *Allocated = 0;
  58   unsigned *previous = SmallBuffer;
  59   if (2*(n + 1) > SmallBufferSize)
  60     Allocated = previous = new unsigned [2*(n+1)];
  61   unsigned *current = previous + (n + 1);
  62
  63   for (unsigned i = 0; i <= n; ++i)
  64     previous[i] = i;
  65
  66   for (size_type y = 1; y <= m; ++y) {
  67     current[0] = y;
  68     for (size_type x = 1; x <= n; ++x) {
  69       if (AllowReplacements) {
  70         current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
  71                          min(current[x-1], previous[x])+1);
  72       }
  73       else {
  74         if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
  75         else current[x] = min(current[x-1], previous[x]) + 1;
  76       }
  77     }
  78
  79     unsigned *tmp = current;
  80     current = previous;
  81     previous = tmp;
  82   }
  83
  84   unsigned Result = previous[n];
  85   delete [] Allocated;
  86
  87   return Result;
  88 }
  89
  90 //===----------------------------------------------------------------------===//
  91 // String Searching
  92 //===----------------------------------------------------------------------===//
  93
  94
  95 /// find - Search for the first string \arg Str in the string.
  96 ///
  97 /// \return - The index of the first occurence of \arg Str, or npos if not
  98 /// found.
  99 size_t StringRef::find(StringRef Str, size_t From) const {
 100   size_t N = Str.size();
 101   if (N > Length)
 102     return npos;
 103   for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
 104     if (substr(i, N).equals(Str))
 105       return i;
 106   return npos;
 107 }
 108
 109 /// rfind - Search for the last string \arg Str in the string.
 110 ///
 111 /// \return - The index of the last occurence of \arg Str, or npos if not
 112 /// found.
 113 size_t StringRef::rfind(StringRef Str) const {
 114   size_t N = Str.size();
 115   if (N > Length)
 116     return npos;
 117   for (size_t i = Length - N + 1, e = 0; i != e;) {
 118     --i;
 119     if (substr(i, N).equals(Str))
 120       return i;
 121   }
 122   return npos;
 123 }
 124
 125 /// find_first_of - Find the first character in the string that is in \arg
 126 /// Chars, or npos if not found.
 127 ///
 128 /// Note: O(size() * Chars.size())
 129 StringRef::size_type StringRef::find_first_of(StringRef Chars,
 130                                               size_t From) const {
 131   for (size_type i = min(From, Length), e = Length; i != e; ++i)
 132     if (Chars.find(Data[i]) != npos)
 133       return i;
 134   return npos;
 135 }
 136
 137 /// find_first_not_of - Find the first character in the string that is not
 138 /// \arg C or npos if not found.
 139 StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
 140   for (size_type i = min(From, Length), e = Length; i != e; ++i)
 141     if (Data[i] != C)
 142       return i;
 143   return npos;
 144 }
 145
 146 /// find_first_not_of - Find the first character in the string that is not
 147 /// in the string \arg Chars, or npos if not found.
 148 ///
 149 /// Note: O(size() * Chars.size())
 150 StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
 151                                                   size_t From) const {
 152   for (size_type i = min(From, Length), e = Length; i != e; ++i)
 153     if (Chars.find(Data[i]) == npos)
 154       return i;
 155   return npos;
 156 }
 157
 158
 159 //===----------------------------------------------------------------------===//
 160 // Helpful Algorithms
 161 //===----------------------------------------------------------------------===//
 162
 163 /// count - Return the number of non-overlapped occurrences of \arg Str in
 164 /// the string.
 165 size_t StringRef::count(StringRef Str) const {
 166   size_t Count = 0;
 167   size_t N = Str.size();
 168   if (N > Length)
 169     return 0;
 170   for (size_t i = 0, e = Length - N + 1; i != e; ++i)
 171     if (substr(i, N).equals(Str))
 172       ++Count;
 173   return Count;
 174 }
 175
 176 static unsigned GetAutoSenseRadix(StringRef &Str) {
 177   if (Str.startswith("0x")) {
 178     Str = Str.substr(2);
 179     return 16;
 180   } else if (Str.startswith("0b")) {
 181     Str = Str.substr(2);
 182     return 2;
 183   } else if (Str.startswith("0")) {
 184     return 8;
 185   } else {
 186     return 10;
 187   }
 188 }
 189
 190
 191 /// GetAsUnsignedInteger - Workhorse method that converts a integer character
 192 /// sequence of radix up to 36 to an unsigned long long value.
 193 static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
 194                                  unsigned long long &Result) {
 195   // Autosense radix if not specified.
 196   if (Radix == 0)
 197     Radix = GetAutoSenseRadix(Str);
 198
 199   // Empty strings (after the radix autosense) are invalid.
 200   if (Str.empty()) return true;
 201
 202   // Parse all the bytes of the string given this radix.  Watch for overflow.
 203   Result = 0;
 204   while (!Str.empty()) {
 205     unsigned CharVal;
 206     if (Str[0] >= '0' && Str[0] <= '9')
 207       CharVal = Str[0]-'0';
 208     else if (Str[0] >= 'a' && Str[0] <= 'z')
 209       CharVal = Str[0]-'a'+10;
 210     else if (Str[0] >= 'A' && Str[0] <= 'Z')
 211       CharVal = Str[0]-'A'+10;
 212     else
 213       return true;
 214
 215     // If the parsed value is larger than the integer radix, the string is
 216     // invalid.
 217     if (CharVal >= Radix)
 218       return true;
 219
 220     // Add in this character.
 221     unsigned long long PrevResult = Result;
 222     Result = Result*Radix+CharVal;
 223
 224     // Check for overflow.
 225     if (Result < PrevResult)
 226       return true;
 227
 228     Str = Str.substr(1);
 229   }
 230
 231   return false;
 232 }
 233
 234 bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
 235   return GetAsUnsignedInteger(*this, Radix, Result);
 236 }
 237
 238
 239 bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
 240   unsigned long long ULLVal;
 241
 242   // Handle positive strings first.
 243   if (empty() || front() != '-') {
 244     if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
 245         // Check for value so large it overflows a signed value.
 246         (long long)ULLVal < 0)
 247       return true;
 248     Result = ULLVal;
 249     return false;
 250   }
 251
 252   // Get the positive part of the value.
 253   if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
 254       // Reject values so large they'd overflow as negative signed, but allow
 255       // "-0".  This negates the unsigned so that the negative isn't undefined
 256       // on signed overflow.
 257       (long long)-ULLVal > 0)
 258     return true;
 259
 260   Result = -ULLVal;
 261   return false;
 262 }
 263
 264 bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
 265   long long Val;
 266   if (getAsInteger(Radix, Val) ||
 267       (int)Val != Val)
 268     return true;
 269   Result = Val;
 270   return false;
 271 }
 272
 273 bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
 274   unsigned long long Val;
 275   if (getAsInteger(Radix, Val) ||
 276       (unsigned)Val != Val)
 277     return true;
 278   Result = Val;
 279   return false;
 280 }
 281
 282 bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
 283   StringRef Str = *this;
 284
 285   // Autosense radix if not specified.
 286   if (Radix == 0)
 287     Radix = GetAutoSenseRadix(Str);
 288
 289   assert(Radix > 1 && Radix <= 36);
 290
 291   // Empty strings (after the radix autosense) are invalid.
 292   if (Str.empty()) return true;
 293
 294   // Skip leading zeroes.  This can be a significant improvement if
 295   // it means we don't need > 64 bits.
 296   while (!Str.empty() && Str.front() == '0')
 297     Str = Str.substr(1);
 298
 299   // If it was nothing but zeroes....
 300   if (Str.empty()) {
 301     Result = APInt(64, 0);
 302     return false;
 303   }
 304
 305   // (Over-)estimate the required number of bits.
 306   unsigned Log2Radix = 0;
 307   while ((1U << Log2Radix) < Radix) Log2Radix++;
 308   bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
 309
 310   unsigned BitWidth = Log2Radix * Str.size();
 311   if (BitWidth < Result.getBitWidth())
 312     BitWidth = Result.getBitWidth(); // don't shrink the result
 313   else
 314     Result.zext(BitWidth);
 315
 316   APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
 317   if (!IsPowerOf2Radix) {
 318     // These must have the same bit-width as Result.
 319     RadixAP = APInt(BitWidth, Radix);
 320     CharAP = APInt(BitWidth, 0);
 321   }
 322
 323   // Parse all the bytes of the string given this radix.
 324   Result = 0;
 325   while (!Str.empty()) {
 326     unsigned CharVal;
 327     if (Str[0] >= '0' && Str[0] <= '9')
 328       CharVal = Str[0]-'0';
 329     else if (Str[0] >= 'a' && Str[0] <= 'z')
 330       CharVal = Str[0]-'a'+10;
 331     else if (Str[0] >= 'A' && Str[0] <= 'Z')
 332       CharVal = Str[0]-'A'+10;
 333     else
 334       return true;
 335
 336     // If the parsed value is larger than the integer radix, the string is
 337     // invalid.
 338     if (CharVal >= Radix)
 339       return true;
 340
 341     // Add in this character.
 342     if (IsPowerOf2Radix) {
 343       Result <<= Log2Radix;
 344       Result |= CharVal;
 345     } else {
 346       Result *= RadixAP;
 347       CharAP = CharVal;
 348       Result += CharAP;
 349     }
 350
 351     Str = Str.substr(1);
 352   }
 353
 354   return false;
 355 }