X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FSupport%2FRegex.cpp;h=f7fe1e4c7925d635f82e7fa6fb4c016fb233550f;hb=f15492fd7292563049ace40be9a2e0048e64b8f0;hp=f8b2446e3cf2ce82c84894ebe37756e1ec7a6133;hpb=ce0c81e7dd321e9f94f628daa5528f56cab0ab88;p=oota-llvm.git diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index f8b2446e3cf..f7fe1e4c792 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -10,60 +10,57 @@ // This file implements a POSIX regular expression matcher. // //===----------------------------------------------------------------------===// + #include "llvm/Support/Regex.h" +#include "regex_impl.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "regex_impl.h" #include - using namespace llvm; -Regex::Regex(const StringRef ®ex, unsigned Flags) -{ + +Regex::Regex(StringRef regex, unsigned Flags) { unsigned flags = 0; - preg = new struct llvm_regex; + preg = new llvm_regex(); preg->re_endp = regex.end(); if (Flags & IgnoreCase) flags |= REG_ICASE; - if (Flags & NoSub) { - flags |= REG_NOSUB; - sub = false; - } else { - sub = true; - } if (Flags & Newline) flags |= REG_NEWLINE; - error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND); + if (!(Flags & BasicRegex)) + flags |= REG_EXTENDED; + error = llvm_regcomp(preg, regex.data(), flags|REG_PEND); } -bool Regex::isValid(std::string &Error) -{ +Regex::~Regex() { + if (preg) { + llvm_regfree(preg); + delete preg; + } +} + +bool Regex::isValid(std::string &Error) { if (!error) return true; - - size_t len = llvm_regerror(error, preg, NULL, 0); - char *errbuff = new char[len]; - llvm_regerror(error, preg, errbuff, len); - Error.assign(errbuff); + + size_t len = llvm_regerror(error, preg, nullptr, 0); + + Error.resize(len - 1); + llvm_regerror(error, preg, &Error[0], len); return false; } -Regex::~Regex() -{ - llvm_regfree(preg); - delete preg; +/// getNumMatches - In a valid regex, return the number of parenthesized +/// matches it contains. +unsigned Regex::getNumMatches() const { + return preg->re_nsub; } -bool Regex::match(const StringRef &String, SmallVectorImpl *Matches) -{ +bool Regex::match(StringRef String, SmallVectorImpl *Matches){ unsigned nmatch = Matches ? preg->re_nsub+1 : 0; - if (Matches) { - assert(sub && "Substring matching requested but pattern compiled without"); - Matches->clear(); - } - // pmatch needs to have at least one element. - SmallVector pm; + SmallVector pm; pm.resize(nmatch > 0 ? nmatch : 1); pm[0].rm_so = 0; pm[0].rm_eo = String.size(); @@ -81,13 +78,15 @@ bool Regex::match(const StringRef &String, SmallVectorImpl *Matches) // There was a match. if (Matches) { // match position requested - for (unsigned i=0;iclear(); + + for (unsigned i = 0; i != nmatch; ++i) { if (pm[i].rm_so == -1) { // this group didn't match Matches->push_back(StringRef()); continue; } - assert(pm[i].rm_eo > pm[i].rm_so); + assert(pm[i].rm_eo >= pm[i].rm_so); Matches->push_back(StringRef(String.data()+pm[i].rm_so, pm[i].rm_eo-pm[i].rm_so)); } @@ -95,3 +94,100 @@ bool Regex::match(const StringRef &String, SmallVectorImpl *Matches) return true; } + +std::string Regex::sub(StringRef Repl, StringRef String, + std::string *Error) { + SmallVector Matches; + + // Reset error, if given. + if (Error && !Error->empty()) *Error = ""; + + // Return the input if there was no match. + if (!match(String, &Matches)) + return String; + + // Otherwise splice in the replacement string, starting with the prefix before + // the match. + std::string Res(String.begin(), Matches[0].begin()); + + // Then the replacement string, honoring possible substitutions. + while (!Repl.empty()) { + // Skip to the next escape. + std::pair Split = Repl.split('\\'); + + // Add the skipped substring. + Res += Split.first; + + // Check for terminimation and trailing backslash. + if (Split.second.empty()) { + if (Repl.size() != Split.first.size() && + Error && Error->empty()) + *Error = "replacement string contained trailing backslash"; + break; + } + + // Otherwise update the replacement string and interpret escapes. + Repl = Split.second; + + // FIXME: We should have a StringExtras function for mapping C99 escapes. + switch (Repl[0]) { + // Treat all unrecognized characters as self-quoting. + default: + Res += Repl[0]; + Repl = Repl.substr(1); + break; + + // Single character escapes. + case 't': + Res += '\t'; + Repl = Repl.substr(1); + break; + case 'n': + Res += '\n'; + Repl = Repl.substr(1); + break; + + // Decimal escapes are backreferences. + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + // Extract the backreference number. + StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789")); + Repl = Repl.substr(Ref.size()); + + unsigned RefValue; + if (!Ref.getAsInteger(10, RefValue) && + RefValue < Matches.size()) + Res += Matches[RefValue]; + else if (Error && Error->empty()) + *Error = "invalid backreference string '" + Ref.str() + "'"; + break; + } + } + } + + // And finally the suffix. + Res += StringRef(Matches[0].end(), String.end() - Matches[0].end()); + + return Res; +} + +// These are the special characters matched in functions like "p_ere_exp". +static const char RegexMetachars[] = "()^$|*+?.[]\\{}"; + +bool Regex::isLiteralERE(StringRef Str) { + // Check for regex metacharacters. This list was derived from our regex + // implementation in regcomp.c and double checked against the POSIX extended + // regular expression specification. + return Str.find_first_of(RegexMetachars) == StringRef::npos; +} + +std::string Regex::escape(StringRef String) { + std::string RegexStr; + for (unsigned i = 0, e = String.size(); i != e; ++i) { + if (strchr(RegexMetachars, String[i])) + RegexStr += '\\'; + RegexStr += String[i]; + } + + return RegexStr; +}