1 //===-- StringRef.cpp - Lightweight String References ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/ADT/StringRef.h"
11 #include "llvm/ADT/APInt.h"
16 // MSVC emits references to this into the translation units which reference it.
18 const size_t StringRef::npos;
21 static char ascii_tolower(char x) {
22 if (x >= 'A' && x <= 'Z')
27 static bool ascii_isdigit(char x) {
28 return x >= '0' && x <= '9';
31 /// compare_lower - Compare strings, ignoring case.
32 int StringRef::compare_lower(StringRef RHS) const {
33 for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
34 char LHC = ascii_tolower(Data[I]);
35 char RHC = ascii_tolower(RHS.Data[I]);
37 return LHC < RHC ? -1 : 1;
40 if (Length == RHS.Length)
42 return Length < RHS.Length ? -1 : 1;
45 /// compare_numeric - Compare strings, handle embedded numbers.
46 int StringRef::compare_numeric(StringRef RHS) const {
47 for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
48 if (Data[I] == RHS.Data[I])
50 if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
51 // The longer sequence of numbers is larger. This doesn't really handle
52 // prefixed zeros well.
53 for (size_t J = I+1; J != E+1; ++J) {
54 bool ld = J < Length && ascii_isdigit(Data[J]);
55 bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
62 return Data[I] < RHS.Data[I] ? -1 : 1;
64 if (Length == RHS.Length)
66 return Length < RHS.Length ? -1 : 1;
69 // Compute the edit distance between the two given strings.
70 unsigned StringRef::edit_distance(llvm::StringRef Other,
71 bool AllowReplacements) {
72 // The algorithm implemented below is the "classic"
73 // dynamic-programming algorithm for computing the Levenshtein
74 // distance, which is described here:
76 // http://en.wikipedia.org/wiki/Levenshtein_distance
78 // Although the algorithm is typically described using an m x n
79 // array, only two rows are used at a time, so this implemenation
80 // just keeps two separate vectors for those two rows.
82 size_type n = Other.size();
84 const unsigned SmallBufferSize = 64;
85 unsigned SmallBuffer[SmallBufferSize];
86 unsigned *Allocated = 0;
87 unsigned *previous = SmallBuffer;
88 if (2*(n + 1) > SmallBufferSize)
89 Allocated = previous = new unsigned [2*(n+1)];
90 unsigned *current = previous + (n + 1);
92 for (unsigned i = 0; i <= n; ++i)
95 for (size_type y = 1; y <= m; ++y) {
97 for (size_type x = 1; x <= n; ++x) {
98 if (AllowReplacements) {
99 current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
100 min(current[x-1], previous[x])+1);
103 if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
104 else current[x] = min(current[x-1], previous[x]) + 1;
108 unsigned *tmp = current;
113 unsigned Result = previous[n];
119 //===----------------------------------------------------------------------===//
121 //===----------------------------------------------------------------------===//
124 /// find - Search for the first string \arg Str in the string.
126 /// \return - The index of the first occurence of \arg Str, or npos if not
128 size_t StringRef::find(StringRef Str, size_t From) const {
129 size_t N = Str.size();
132 for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
133 if (substr(i, N).equals(Str))
138 /// rfind - Search for the last string \arg Str in the string.
140 /// \return - The index of the last occurence of \arg Str, or npos if not
142 size_t StringRef::rfind(StringRef Str) const {
143 size_t N = Str.size();
146 for (size_t i = Length - N + 1, e = 0; i != e;) {
148 if (substr(i, N).equals(Str))
154 /// find_first_of - Find the first character in the string that is in \arg
155 /// Chars, or npos if not found.
157 /// Note: O(size() + Chars.size())
158 StringRef::size_type StringRef::find_first_of(StringRef Chars,
160 std::bitset<1 << CHAR_BIT> CharBits;
161 for (size_type i = 0; i != Chars.size(); ++i)
162 CharBits.set((unsigned char)Chars[i]);
164 for (size_type i = min(From, Length), e = Length; i != e; ++i)
165 if (CharBits.test((unsigned char)Data[i]))
170 /// find_first_not_of - Find the first character in the string that is not
171 /// \arg C or npos if not found.
172 StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
173 for (size_type i = min(From, Length), e = Length; i != e; ++i)
179 /// find_first_not_of - Find the first character in the string that is not
180 /// in the string \arg Chars, or npos if not found.
182 /// Note: O(size() + Chars.size())
183 StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
185 std::bitset<1 << CHAR_BIT> CharBits;
186 for (size_type i = 0; i != Chars.size(); ++i)
187 CharBits.set((unsigned char)Chars[i]);
189 for (size_type i = min(From, Length), e = Length; i != e; ++i)
190 if (!CharBits.test((unsigned char)Data[i]))
196 //===----------------------------------------------------------------------===//
197 // Helpful Algorithms
198 //===----------------------------------------------------------------------===//
200 /// count - Return the number of non-overlapped occurrences of \arg Str in
202 size_t StringRef::count(StringRef Str) const {
204 size_t N = Str.size();
207 for (size_t i = 0, e = Length - N + 1; i != e; ++i)
208 if (substr(i, N).equals(Str))
213 static unsigned GetAutoSenseRadix(StringRef &Str) {
214 if (Str.startswith("0x")) {
217 } else if (Str.startswith("0b")) {
220 } else if (Str.startswith("0")) {
228 /// GetAsUnsignedInteger - Workhorse method that converts a integer character
229 /// sequence of radix up to 36 to an unsigned long long value.
230 static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
231 unsigned long long &Result) {
232 // Autosense radix if not specified.
234 Radix = GetAutoSenseRadix(Str);
236 // Empty strings (after the radix autosense) are invalid.
237 if (Str.empty()) return true;
239 // Parse all the bytes of the string given this radix. Watch for overflow.
241 while (!Str.empty()) {
243 if (Str[0] >= '0' && Str[0] <= '9')
244 CharVal = Str[0]-'0';
245 else if (Str[0] >= 'a' && Str[0] <= 'z')
246 CharVal = Str[0]-'a'+10;
247 else if (Str[0] >= 'A' && Str[0] <= 'Z')
248 CharVal = Str[0]-'A'+10;
252 // If the parsed value is larger than the integer radix, the string is
254 if (CharVal >= Radix)
257 // Add in this character.
258 unsigned long long PrevResult = Result;
259 Result = Result*Radix+CharVal;
261 // Check for overflow.
262 if (Result < PrevResult)
271 bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
272 return GetAsUnsignedInteger(*this, Radix, Result);
276 bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
277 unsigned long long ULLVal;
279 // Handle positive strings first.
280 if (empty() || front() != '-') {
281 if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
282 // Check for value so large it overflows a signed value.
283 (long long)ULLVal < 0)
289 // Get the positive part of the value.
290 if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
291 // Reject values so large they'd overflow as negative signed, but allow
292 // "-0". This negates the unsigned so that the negative isn't undefined
293 // on signed overflow.
294 (long long)-ULLVal > 0)
301 bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
303 if (getAsInteger(Radix, Val) ||
310 bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
311 unsigned long long Val;
312 if (getAsInteger(Radix, Val) ||
313 (unsigned)Val != Val)
319 bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
320 StringRef Str = *this;
322 // Autosense radix if not specified.
324 Radix = GetAutoSenseRadix(Str);
326 assert(Radix > 1 && Radix <= 36);
328 // Empty strings (after the radix autosense) are invalid.
329 if (Str.empty()) return true;
331 // Skip leading zeroes. This can be a significant improvement if
332 // it means we don't need > 64 bits.
333 while (!Str.empty() && Str.front() == '0')
336 // If it was nothing but zeroes....
338 Result = APInt(64, 0);
342 // (Over-)estimate the required number of bits.
343 unsigned Log2Radix = 0;
344 while ((1U << Log2Radix) < Radix) Log2Radix++;
345 bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
347 unsigned BitWidth = Log2Radix * Str.size();
348 if (BitWidth < Result.getBitWidth())
349 BitWidth = Result.getBitWidth(); // don't shrink the result
351 Result.zext(BitWidth);
353 APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
354 if (!IsPowerOf2Radix) {
355 // These must have the same bit-width as Result.
356 RadixAP = APInt(BitWidth, Radix);
357 CharAP = APInt(BitWidth, 0);
360 // Parse all the bytes of the string given this radix.
362 while (!Str.empty()) {
364 if (Str[0] >= '0' && Str[0] <= '9')
365 CharVal = Str[0]-'0';
366 else if (Str[0] >= 'a' && Str[0] <= 'z')
367 CharVal = Str[0]-'a'+10;
368 else if (Str[0] >= 'A' && Str[0] <= 'Z')
369 CharVal = Str[0]-'A'+10;
373 // If the parsed value is larger than the integer radix, the string is
375 if (CharVal >= Radix)
378 // Add in this character.
379 if (IsPowerOf2Radix) {
380 Result <<= Log2Radix;