X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FSupport%2FStringRef.cpp;h=d7a0bfa410053389157e9673fc4275c2575be73d;hb=814afe91ccad0e5e1f767303d780fa0318fa5212;hp=c78b6d0afc83b85de9d795b231ce1aa7020c47de;hpb=589fbb1770df5f7bee1c5e24e9e8f4ca5091d528;p=oota-llvm.git diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index c78b6d0afc8..d7a0bfa4100 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -9,7 +9,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/edit_distance.h" #include using namespace llvm; @@ -84,57 +86,10 @@ int StringRef::compare_numeric(StringRef RHS) const { unsigned StringRef::edit_distance(llvm::StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) { - // The algorithm implemented below is the "classic" - // dynamic-programming algorithm for computing the Levenshtein - // distance, which is described here: - // - // http://en.wikipedia.org/wiki/Levenshtein_distance - // - // Although the algorithm is typically described using an m x n - // array, only two rows are used at a time, so this implemenation - // just keeps two separate vectors for those two rows. - size_type m = size(); - size_type n = Other.size(); - - const unsigned SmallBufferSize = 64; - unsigned SmallBuffer[SmallBufferSize]; - llvm::OwningArrayPtr Allocated; - unsigned *previous = SmallBuffer; - if (2*(n + 1) > SmallBufferSize) { - previous = new unsigned [2*(n+1)]; - Allocated.reset(previous); - } - unsigned *current = previous + (n + 1); - - for (unsigned i = 0; i <= n; ++i) - previous[i] = i; - - for (size_type y = 1; y <= m; ++y) { - current[0] = y; - unsigned BestThisRow = current[0]; - - for (size_type x = 1; x <= n; ++x) { - if (AllowReplacements) { - current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u), - min(current[x-1], previous[x])+1); - } - else { - if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1]; - else current[x] = min(current[x-1], previous[x]) + 1; - } - BestThisRow = min(BestThisRow, current[x]); - } - - if (MaxEditDistance && BestThisRow > MaxEditDistance) - return MaxEditDistance + 1; - - unsigned *tmp = current; - current = previous; - previous = tmp; - } - - unsigned Result = previous[n]; - return Result; + return llvm::ComputeEditDistance( + llvm::ArrayRef(data(), size()), + llvm::ArrayRef(Other.data(), Other.size()), + AllowReplacements, MaxEditDistance); } //===----------------------------------------------------------------------===// @@ -152,7 +107,7 @@ std::string StringRef::lower() const { std::string StringRef::upper() const { std::string Result(size(), char()); for (size_type i = 0, e = size(); i != e; ++i) { - Result[i] = ascii_tolower(Data[i]); + Result[i] = ascii_toupper(Data[i]); } return Result; } @@ -275,6 +230,52 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars, return npos; } +/// find_last_not_of - Find the last character in the string that is not +/// \arg C, or npos if not found. +StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const { + for (size_type i = min(From, Length) - 1, e = -1; i != e; --i) + if (Data[i] != C) + return i; + return npos; +} + +/// find_last_not_of - Find the last character in the string that is not in +/// \arg Chars, or npos if not found. +/// +/// Note: O(size() + Chars.size()) +StringRef::size_type StringRef::find_last_not_of(StringRef Chars, + size_t From) const { + std::bitset<1 << CHAR_BIT> CharBits; + for (size_type i = 0, e = Chars.size(); i != e; ++i) + CharBits.set((unsigned char)Chars[i]); + + for (size_type i = min(From, Length) - 1, e = -1; i != e; --i) + if (!CharBits.test((unsigned char)Data[i])) + return i; + return npos; +} + +void StringRef::split(SmallVectorImpl &A, + StringRef Separators, int MaxSplit, + bool KeepEmpty) const { + StringRef rest = *this; + + // rest.data() is used to distinguish cases like "a," that splits into + // "a" + "" and "a" that splits into "a" + 0. + for (int splits = 0; + rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit); + ++splits) { + std::pair p = rest.split(Separators); + + if (KeepEmpty || p.first.size() != 0) + A.push_back(p.first); + rest = p.second; + } + // If we have a tail left, add it. + if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty)) + A.push_back(rest); +} + //===----------------------------------------------------------------------===// // Helpful Algorithms //===----------------------------------------------------------------------===// @@ -296,21 +297,29 @@ static unsigned GetAutoSenseRadix(StringRef &Str) { if (Str.startswith("0x")) { Str = Str.substr(2); return 16; - } else if (Str.startswith("0b")) { + } + + if (Str.startswith("0b")) { Str = Str.substr(2); return 2; - } else if (Str.startswith("0")) { + } + + if (Str.startswith("0o")) { + Str = Str.substr(2); return 8; - } else { - return 10; } + + if (Str.startswith("0")) + return 8; + + return 10; } /// GetAsUnsignedInteger - Workhorse method that converts a integer character /// sequence of radix up to 36 to an unsigned long long value. -static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, - unsigned long long &Result) { +bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix, + unsigned long long &Result) { // Autosense radix if not specified. if (Radix == 0) Radix = GetAutoSenseRadix(Str); @@ -340,8 +349,8 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, unsigned long long PrevResult = Result; Result = Result*Radix+CharVal; - // Check for overflow. - if (Result < PrevResult) + // Check for overflow by shifting back and seeing if bits were lost. + if (Result/Radix < PrevResult) return true; Str = Str.substr(1); @@ -350,17 +359,13 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, return false; } -bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const { - return GetAsUnsignedInteger(*this, Radix, Result); -} - - -bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { +bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, + long long &Result) { unsigned long long ULLVal; // Handle positive strings first. - if (empty() || front() != '-') { - if (GetAsUnsignedInteger(*this, Radix, ULLVal) || + if (Str.empty() || Str.front() != '-') { + if (getAsUnsignedInteger(Str, Radix, ULLVal) || // Check for value so large it overflows a signed value. (long long)ULLVal < 0) return true; @@ -369,7 +374,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { } // Get the positive part of the value. - if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) || + if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) || // Reject values so large they'd overflow as negative signed, but allow // "-0". This negates the unsigned so that the negative isn't undefined // on signed overflow. @@ -380,24 +385,6 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { return false; } -bool StringRef::getAsInteger(unsigned Radix, int &Result) const { - long long Val; - if (getAsInteger(Radix, Val) || - (int)Val != Val) - return true; - Result = Val; - return false; -} - -bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const { - unsigned long long Val; - if (getAsInteger(Radix, Val) || - (unsigned)Val != Val) - return true; - Result = Val; - return false; -} - bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { StringRef Str = *this; @@ -429,7 +416,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { unsigned BitWidth = Log2Radix * Str.size(); if (BitWidth < Result.getBitWidth()) BitWidth = Result.getBitWidth(); // don't shrink the result - else + else if (BitWidth > Result.getBitWidth()) Result = Result.zext(BitWidth); APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix @@ -472,3 +459,9 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { return false; } + + +// Implementation of StringRef hashing. +hash_code llvm::hash_value(StringRef S) { + return hash_combine_range(S.begin(), S.end()); +}