From: Chris Lattner Date: Fri, 25 Sep 2009 17:23:43 +0000 (+0000) Subject: reimplement the regex matching strategy by building a single X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5d6a05f4d4faea0c0c96fbf2bb57655df2839b34;p=oota-llvm.git reimplement the regex matching strategy by building a single regex and matching it instead of trying to match chunks at a time. Matching chunks at a time broke with check lines like CHECK: foo {{.*}}bar because the .* would eat the entire rest of the line and bar would never match. Now we just escape the fixed strings for the user, so that something like: CHECK: a() {{.*}}??? is matched as: CHECK: {{a\(\) .*\?\?\?}} transparently "under the covers". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82779 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll index 86ecbe5ec7e..af263f95b8d 100644 --- a/test/CodeGen/X86/xor.ll +++ b/test/CodeGen/X86/xor.ll @@ -59,10 +59,10 @@ bb12: ; X64: test4: ; X64: notl %eax -; X64: andl {{.*%eax}} +; X64: andl {{.*}}%eax ; X32: test4: ; X32: notl %edx -; X32: andl {{.*%edx}} +; X32: andl {{.*}}%edx } define i16 @test5(i16 %a, i16 %b) nounwind { @@ -81,10 +81,10 @@ bb12: ret i16 %tmp3 ; X64: test5: ; X64: notw %ax -; X64: andw {{.*%ax}} +; X64: andw {{.*}}%ax ; X32: test5: ; X32: notw %dx -; X32: andw {{.*%dx}} +; X32: andw {{.*}}%dx } define i8 @test6(i8 %a, i8 %b) nounwind { @@ -103,10 +103,10 @@ bb12: ret i8 %tmp3 ; X64: test6: ; X64: notb %al -; X64: andb {{.*%al}} +; X64: andb {{.*}}%al ; X32: test6: ; X32: notb %dl -; X32: andb {{.*%dl}} +; X32: andb {{.*}}%dl } define i32 @test7(i32 %a, i32 %b) nounwind { @@ -125,9 +125,9 @@ bb12: ret i32 %tmp3 ; X64: test7: ; X64: xorl $2147483646, %eax -; X64: andl {{.*%eax}} +; X64: andl {{.*}}%eax ; X32: test7: ; X32: xorl $2147483646, %edx -; X32: andl {{.*%edx}} +; X32: andl {{.*}}%edx } diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index 061d321b903..1e6af371d8e 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -44,39 +44,13 @@ NoCanonicalizeWhiteSpace("strict-whitespace", // Pattern Handling Code. //===----------------------------------------------------------------------===// -class PatternChunk { - StringRef Str; - bool isRegEx; -public: - PatternChunk(StringRef S, bool isRE) : Str(S), isRegEx(isRE) {} - - size_t Match(StringRef Buffer, size_t &MatchLen) const { - if (!isRegEx) { - // Fixed string match. - MatchLen = Str.size(); - return Buffer.find(Str); - } - - // Regex match. - SmallVector MatchInfo; - if (!Regex(Str, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo)) - return StringRef::npos; - - // Successful regex match. - assert(!MatchInfo.empty() && "Didn't get any match"); - StringRef FullMatch = MatchInfo[0]; - - MatchLen = FullMatch.size(); - return FullMatch.data()-Buffer.data(); - } -}; - class Pattern { - /// Chunks - The pattern chunks to match. If the bool is false, it is a fixed - /// string match, if it is true, it is a regex match. - SmallVector Chunks; - + /// FixedStr - If non-empty, this pattern is a fixed string match with the + /// specified fixed string. StringRef FixedStr; + + /// RegEx - If non-empty, this is a regex pattern. + std::string RegExStr; public: Pattern() { } @@ -87,6 +61,9 @@ public: /// returns the position that is matched or npos if there is no match. If /// there is a match, the size of the matched string is returned in MatchLen. size_t Match(StringRef Buffer, size_t &MatchLen) const; + +private: + void AddFixedStringToRegEx(StringRef FixedStr); }; bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { @@ -109,17 +86,15 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { return false; } - // Otherwise, there is at least one regex piece. - - // Scan the pattern to break it into regex and non-regex pieces. + // Otherwise, there is at least one regex piece. Build up the regex pattern + // by escaping scary characters in fixed strings, building up one big regex. while (!PatternStr.empty()) { // Handle fixed string matches. if (PatternStr.size() < 2 || PatternStr[0] != '{' || PatternStr[1] != '{') { // Find the end, which is the start of the next regex. size_t FixedMatchEnd = PatternStr.find("{{"); - - Chunks.push_back(PatternChunk(PatternStr.substr(0, FixedMatchEnd),false)); + AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd)); PatternStr = PatternStr.substr(FixedMatchEnd); continue; } @@ -132,7 +107,8 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { return true; } - Regex R(PatternStr.substr(2, End-2)); + StringRef RegexStr = PatternStr.substr(2, End-2); + Regex R(RegexStr); std::string Error; if (!R.isValid(Error)) { SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2), @@ -140,13 +116,41 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { return true; } - Chunks.push_back(PatternChunk(PatternStr.substr(2, End-2), true)); + RegExStr += RegexStr.str(); PatternStr = PatternStr.substr(End+2); } return false; } +void Pattern::AddFixedStringToRegEx(StringRef FixedStr) { + // Add the characters from FixedStr to the regex, escaping as needed. This + // avoids "leaning toothpicks" in common patterns. + for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) { + switch (FixedStr[i]) { + // These are the special characters matched in "p_ere_exp". + case '(': + case ')': + case '^': + case '$': + case '|': + case '*': + case '+': + case '?': + case '.': + case '[': + case '\\': + case '{': + RegExStr += '\\'; + // FALL THROUGH. + default: + RegExStr += FixedStr[i]; + break; + } + } +} + + /// Match - Match the pattern string against the input buffer Buffer. This /// returns the position that is matched or npos if there is no match. If /// there is a match, the size of the matched string is returned in MatchLen. @@ -157,58 +161,17 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const { return Buffer.find(FixedStr); } - size_t FirstMatch = StringRef::npos; - MatchLen = 0; + // Regex match. + SmallVector MatchInfo; + if (!Regex(RegExStr, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo)) + return StringRef::npos; - while (!Buffer.empty()) { - StringRef MatchAttempt = Buffer; - - unsigned ChunkNo = 0, e = Chunks.size(); - for (; ChunkNo != e; ++ChunkNo) { - size_t ThisMatch, ThisLength = StringRef::npos; - ThisMatch = Chunks[ChunkNo].Match(MatchAttempt, ThisLength); - - // Otherwise, what we do depends on if this is the first match or not. If - // this is the first match, it doesn't match to match at the start of - // MatchAttempt. - if (ChunkNo == 0) { - // If the first match fails then this pattern will never match in - // Buffer. - if (ThisMatch == StringRef::npos) - return ThisMatch; - - FirstMatch = ThisMatch; - MatchAttempt = MatchAttempt.substr(FirstMatch); - ThisMatch = 0; - } - - // If this chunk didn't match, then the entire pattern didn't match from - // FirstMatch, try later in the buffer. - if (ThisMatch == StringRef::npos) - break; - - // Ok, if the match didn't match at the beginning of MatchAttempt, then we - // have something like "ABC{{DEF}} and something was in-between. Reject - // the match. - if (ThisMatch != 0) - break; - - // Otherwise, match the string and move to the next chunk. - MatchLen += ThisLength; - MatchAttempt = MatchAttempt.substr(ThisLength); - } - - // If the whole thing matched, we win. - if (ChunkNo == e) - return FirstMatch; - - // Otherwise, try matching again after FirstMatch to see if this pattern - // matches later in the buffer. - Buffer = Buffer.substr(FirstMatch+1); - } + // Successful regex match. + assert(!MatchInfo.empty() && "Didn't get any match"); + StringRef FullMatch = MatchInfo[0]; - // If we ran out of stuff to scan, then we didn't match. - return StringRef::npos; + MatchLen = FullMatch.size(); + return FullMatch.data()-Buffer.data(); }