X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=utils%2FFileCheck%2FFileCheck.cpp;h=f2255948658e8847a3c2220cb3baa579c3e7abb4;hb=93a635c82c33e331bd4c5f4f44ba1e37a08bd687;hp=3c4742cc36fbed0cc26619ae93707cacf782fccc;hpb=0806f9ff5860ce351540ff2897dec1667154ca19;p=oota-llvm.git diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index 3c4742cc36f..f2255948658 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -16,13 +16,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Signals.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/system_error.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include @@ -49,32 +51,36 @@ NoCanonicalizeWhiteSpace("strict-whitespace", class Pattern { SMLoc PatternLoc; - + + /// MatchEOF - When set, this pattern only matches the end of file. This is + /// used for trailing CHECK-NOTs. + bool MatchEOF; + /// FixedStr - If non-empty, this pattern is a fixed string match with the /// specified fixed string. StringRef FixedStr; - + /// RegEx - If non-empty, this is a regex pattern. std::string RegExStr; - + /// VariableUses - Entries in this vector map to uses of a variable in the /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain /// "foobaz" and we'll get an entry in this vector that tells us to insert the /// value of bar at offset 3. std::vector > VariableUses; - + /// VariableDefs - Entries in this vector map to definitions of a variable in /// the pattern, e.g. "foo[[bar:.*]]baz". In this case, the RegExStr will /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1. The /// index indicates what parenthesized value captures the variable value. std::vector > VariableDefs; - + public: - - Pattern() { } - + + Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { } + bool ParsePattern(StringRef PatternStr, SourceMgr &SM); - + /// Match - Match the pattern string against the input buffer Buffer. This /// returns the position that is matched or npos if there is no match. If /// there is a match, the size of the matched string is returned in MatchLen. @@ -103,19 +109,19 @@ private: bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { PatternLoc = SMLoc::getFromPointer(PatternStr.data()); - + // Ignore trailing whitespace. while (!PatternStr.empty() && (PatternStr.back() == ' ' || PatternStr.back() == '\t')) PatternStr = PatternStr.substr(0, PatternStr.size()-1); - + // Check that there is something on the line. if (PatternStr.empty()) { SM.PrintMessage(PatternLoc, "found empty check string with prefix '" + CheckPrefix+":'", "error"); return true; } - + // Check to see if this is a fixed string, or if it has regex pieces. if (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && @@ -123,39 +129,46 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { FixedStr = PatternStr; return false; } - + // Paren value #0 is for the fully matched string. Any new parenthesized - // values add from their. + // values add from there. unsigned CurParen = 1; - + // Otherwise, there is at least one regex piece. Build up the regex pattern // by escaping scary characters in fixed strings, building up one big regex. while (!PatternStr.empty()) { // RegEx matches. - if (PatternStr.size() >= 2 && - PatternStr[0] == '{' && PatternStr[1] == '{') { - + if (PatternStr.startswith("{{")) { + // Otherwise, this is the start of a regex match. Scan for the }}. size_t End = PatternStr.find("}}"); if (End == StringRef::npos) { SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), - "found start of regex string with no end '}}'", "error"); + "found start of regex string with no end '}}'","error"); return true; } - + + // Enclose {{}} patterns in parens just like [[]] even though we're not + // capturing the result for any purpose. This is required in case the + // expression contains an alternation like: CHECK: abc{{x|z}}def. We + // want this to turn into: "abc(x|z)def" not "abcx|zdef". + RegExStr += '('; + ++CurParen; + if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) return true; + RegExStr += ')'; + PatternStr = PatternStr.substr(End+2); continue; } - + // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* // (or some other regex) and assigns it to the FileCheck variable 'foo'. The // second form is [[foo]] which is a reference to foo. The variable name // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject // it. This is to catch some common errors. - if (PatternStr.size() >= 2 && - PatternStr[0] == '[' && PatternStr[1] == '[') { + if (PatternStr.startswith("[[")) { // Verify that it is terminated properly. size_t End = PatternStr.find("]]"); if (End == StringRef::npos) { @@ -163,14 +176,14 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { "invalid named regex reference, no ]] found", "error"); return true; } - + StringRef MatchStr = PatternStr.substr(2, End-2); PatternStr = PatternStr.substr(End+2); - + // Get the regex name (e.g. "foo"). size_t NameEnd = MatchStr.find(':'); StringRef Name = MatchStr.substr(0, NameEnd); - + if (Name.empty()) { SM.PrintMessage(SMLoc::getFromPointer(Name.data()), "invalid name in named regex: empty name", "error"); @@ -179,39 +192,36 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { // Verify that the name is well formed. for (unsigned i = 0, e = Name.size(); i != e; ++i) - if (Name[i] != '_' && - (Name[i] < 'a' || Name[i] > 'z') && - (Name[i] < 'A' || Name[i] > 'Z') && - (Name[i] < '0' || Name[i] > '9')) { + if (Name[i] != '_' && !isalnum(Name[i])) { SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), "invalid name in named regex", "error"); return true; } - + // Name can't start with a digit. if (isdigit(Name[0])) { SM.PrintMessage(SMLoc::getFromPointer(Name.data()), "invalid name in named regex", "error"); return true; } - + // Handle [[foo]]. if (NameEnd == StringRef::npos) { VariableUses.push_back(std::make_pair(Name, RegExStr.size())); continue; } - + // Handle [[foo:.*]]. VariableDefs.push_back(std::make_pair(Name, CurParen)); RegExStr += '('; ++CurParen; - + if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) return true; RegExStr += ')'; } - + // Handle fixed string matches. // Find the end, which is the start of the next regex. size_t FixedMatchEnd = PatternStr.find("{{"); @@ -260,7 +270,7 @@ bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen, "invalid regex: " + Error, "error"); return true; } - + RegExStr += RegexStr.str(); CurParen += R.getNumMatches(); return false; @@ -271,6 +281,12 @@ bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen, /// there is a match, the size of the matched string is returned in MatchLen. size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, StringMap &VariableTable) const { + // If this is the EOF pattern, match it immediately. + if (MatchEOF) { + MatchLen = 0; + return Buffer.size(); + } + // If this is a fixed string pattern, just match it now. if (!FixedStr.empty()) { MatchLen = FixedStr.size(); @@ -278,14 +294,14 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, } // Regex match. - + // If there are variable uses, we need to create a temporary string with the // actual value. StringRef RegExToMatch = RegExStr; std::string TmpStr; if (!VariableUses.empty()) { TmpStr = RegExStr; - + unsigned InsertOffset = 0; for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { StringMap::iterator it = @@ -297,33 +313,33 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, // Look up the value and escape it so that we can plop it into the regex. std::string Value; AddFixedStringToRegEx(it->second, Value); - + // Plop it into the regex at the adjusted offset. TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, Value.begin(), Value.end()); InsertOffset += Value.size(); } - + // Match the newly constructed regex. RegExToMatch = TmpStr; } - - + + SmallVector MatchInfo; if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) return StringRef::npos; - + // Successful regex match. assert(!MatchInfo.empty() && "Didn't get any match"); StringRef FullMatch = MatchInfo[0]; - + // If this defines any variables, remember their values. for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) { assert(VariableDefs[i].second < MatchInfo.size() && "Internal paren error"); VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second]; } - + MatchLen = FullMatch.size(); return FullMatch.data()-Buffer.data(); } @@ -401,11 +417,12 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, } } - if (Best != StringRef::npos && BestQuality < 50) { - // Print the "possible intended match here" line if we found something - // reasonable. - SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), - "possible intended match here", "note"); + // Print the "possible intended match here" line if we found something + // reasonable and not equal to what we showed in the "scanning from here" + // line. + if (Best && Best != StringRef::npos && BestQuality < 50) { + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), + "possible intended match here", "note"); // FIXME: If we wanted to be really friendly we would show why the match // failed, as it can be hard to spot simple one character differences. @@ -420,19 +437,19 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, struct CheckString { /// Pat - The pattern to match. Pattern Pat; - + /// Loc - The location in the match file that the check string was specified. SMLoc Loc; - + /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed /// to a CHECK: directive. bool IsCheckNext; - + /// NotStrings - These are all of the strings that are disallowed from /// occurring between this match string and the previous one (or start of /// file). std::vector > NotStrings; - + CheckString(const Pattern &P, SMLoc L, bool isCheckNext) : Pat(P), Loc(L), IsCheckNext(isCheckNext) {} }; @@ -440,30 +457,33 @@ struct CheckString { /// CanonicalizeInputFile - Remove duplicate horizontal space from the specified /// memory buffer, free it, and return a new one. static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { - SmallVector NewFile; + SmallString<128> NewFile; NewFile.reserve(MB->getBufferSize()); - + for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); Ptr != End; ++Ptr) { + // Eliminate trailing dosish \r. + if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { + continue; + } + // If C is not a horizontal whitespace, skip it. if (*Ptr != ' ' && *Ptr != '\t') { NewFile.push_back(*Ptr); continue; } - + // Otherwise, add one space and advance over neighboring space. NewFile.push_back(' '); while (Ptr+1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) ++Ptr; } - + // Free the old buffer and return a new one. MemoryBuffer *MB2 = - MemoryBuffer::getMemBufferCopy(NewFile.data(), - NewFile.data() + NewFile.size(), - MB->getBufferIdentifier()); - + MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()); + delete MB; return MB2; } @@ -474,41 +494,41 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { static bool ReadCheckFile(SourceMgr &SM, std::vector &CheckStrings) { // Open the check file, and tell SourceMgr about it. - std::string ErrorStr; - MemoryBuffer *F = - MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr); - if (F == 0) { - errs() << "Could not open check file '" << CheckFilename << "': " - << ErrorStr << '\n'; + OwningPtr File; + if (error_code ec = + MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), File)) { + errs() << "Could not open check file '" << CheckFilename << "': " + << ec.message() << '\n'; return true; } - + MemoryBuffer *F = File.take(); + // If we want to canonicalize whitespace, strip excess whitespace from the // buffer containing the CHECK lines. if (!NoCanonicalizeWhiteSpace) F = CanonicalizeInputFile(F); - + SM.AddNewSourceBuffer(F, SMLoc()); // Find all instances of CheckPrefix followed by : in the file. StringRef Buffer = F->getBuffer(); std::vector > NotMatches; - + while (1) { // See if Prefix occurs in the memory buffer. Buffer = Buffer.substr(Buffer.find(CheckPrefix)); - + // If we didn't find a match, we're done. if (Buffer.empty()) break; - + const char *CheckPrefixStart = Buffer.data(); - + // When we find a check prefix, keep track of whether we find CHECK: or // CHECK-NEXT: bool IsCheckNext = false, IsCheckNot = false; - + // Verify that the : is present after the prefix. if (Buffer[CheckPrefix.size()] == ':') { Buffer = Buffer.substr(CheckPrefix.size()+1); @@ -524,11 +544,11 @@ static bool ReadCheckFile(SourceMgr &SM, Buffer = Buffer.substr(1); continue; } - + // Okay, we found the prefix, yay. Remember the rest of the line, but // ignore leading and trailing whitespace. Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); - + // Scan ahead to the end of line. size_t EOL = Buffer.find_first_of("\n\r"); @@ -539,10 +559,10 @@ static bool ReadCheckFile(SourceMgr &SM, Pattern P; if (P.ParsePattern(Buffer.substr(0, EOL), SM)) return true; - + Buffer = Buffer.substr(EOL); - + // Verify that CHECK-NEXT lines have at least one CHECK line before them. if (IsCheckNext && CheckStrings.empty()) { SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart), @@ -550,34 +570,36 @@ static bool ReadCheckFile(SourceMgr &SM, CheckPrefix+ ": line", "error"); return true; } - + // Handle CHECK-NOT. if (IsCheckNot) { NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()), P)); continue; } - - + + // Okay, add the string we captured to the output vector and move on. CheckStrings.push_back(CheckString(P, PatternLoc, IsCheckNext)); std::swap(NotMatches, CheckStrings.back().NotStrings); } - + + // Add an EOF pattern for any trailing CHECK-NOTs. + if (!NotMatches.empty()) { + CheckStrings.push_back(CheckString(Pattern(true), + SMLoc::getFromPointer(Buffer.data()), + false)); + std::swap(NotMatches, CheckStrings.back().NotStrings); + } + if (CheckStrings.empty()) { errs() << "error: no check strings found with prefix '" << CheckPrefix << ":'\n"; return true; } - - if (!NotMatches.empty()) { - errs() << "error: '" << CheckPrefix - << "-NOT:' not supported after last check line.\n"; - return true; - } - + return false; } @@ -587,11 +609,11 @@ static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, // Otherwise, we have an error, emit an error message. SM.PrintMessage(CheckStr.Loc, "expected string not found in input", "error"); - + // Print the "scanning from here" line. If the current position is at the // end of a line, advance to the start of the next line. Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); - + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here", "note"); @@ -607,9 +629,9 @@ static unsigned CountNumNewlinesBetween(StringRef Range) { // Scan for newline. Range = Range.substr(Range.find_first_of("\n\r")); if (Range.empty()) return NumNewLines; - + ++NumNewLines; - + // Handle \n\r and \r\n as a single newline. if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && @@ -625,48 +647,54 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv); SourceMgr SM; - + // Read the expected strings from the check file. std::vector CheckStrings; if (ReadCheckFile(SM, CheckStrings)) return 2; // Open the file to check and add it to SourceMgr. - std::string ErrorStr; - MemoryBuffer *F = - MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr); - if (F == 0) { - errs() << "Could not open input file '" << InputFilename << "': " - << ErrorStr << '\n'; + OwningPtr File; + if (error_code ec = + MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) { + errs() << "Could not open input file '" << InputFilename << "': " + << ec.message() << '\n'; return true; } + MemoryBuffer *F = File.take(); + + if (F->getBufferSize() == 0) { + errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; + return 1; + } // Remove duplicate spaces in the input file if requested. if (!NoCanonicalizeWhiteSpace) F = CanonicalizeInputFile(F); - + SM.AddNewSourceBuffer(F, SMLoc()); - + /// VariableTable - This holds all the current filecheck variables. StringMap VariableTable; - + // Check that we have all of the expected strings, in order, in the input // file. StringRef Buffer = F->getBuffer(); - + const char *LastMatch = Buffer.data(); - + for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) { const CheckString &CheckStr = CheckStrings[StrNo]; - + StringRef SearchFrom = Buffer; - + // Find StrNo in the file. size_t MatchLen = 0; - Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen, VariableTable)); - + size_t MatchPos = CheckStr.Pat.Match(Buffer, MatchLen, VariableTable); + Buffer = Buffer.substr(MatchPos); + // If we didn't find a match, reject the input. - if (Buffer.empty()) { + if (MatchPos == StringRef::npos) { PrintCheckFailed(SM, CheckStr, SearchFrom, VariableTable); return 1; } @@ -691,7 +719,7 @@ int main(int argc, char **argv) { "previous match was here", "note"); return 1; } - + if (NumNewLines != 1) { SM.PrintMessage(CheckStr.Loc, CheckPrefix+ @@ -704,7 +732,7 @@ int main(int argc, char **argv) { return 1; } } - + // If this match had "not strings", verify that they don't exist in the // skipped region. for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); @@ -714,20 +742,20 @@ int main(int argc, char **argv) { MatchLen, VariableTable); if (Pos == StringRef::npos) continue; - + SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), CheckPrefix+"-NOT: string occurred!", "error"); SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, CheckPrefix+"-NOT: pattern specified here", "note"); return 1; } - + // Otherwise, everything is good. Step over the matched text and remember // the position after the match as the end of the last match. Buffer = Buffer.substr(MatchLen); LastMatch = Buffer.data(); } - + return 0; }