NoCanonicalizeWhiteSpace("strict-whitespace",
cl::desc("Do not treat all horizontal whitespace as equivalent"));
+//===----------------------------------------------------------------------===//
+// Pattern Handling Code.
+//===----------------------------------------------------------------------===//
-/// FindStringInBuffer - This is basically just a strstr wrapper that differs in
-/// two ways: first it handles 'nul' characters in memory buffers, second, it
-/// returns the end of the memory buffer on match failure.
-static const char *FindStringInBuffer(const char *Str, const char *CurPtr,
- const MemoryBuffer &MB) {
- // Check to see if we have a match. If so, just return it.
- if (const char *Res = strstr(CurPtr, Str))
- return Res;
-
- // If not, check to make sure we didn't just find an embedded nul in the
- // memory buffer.
- const char *Ptr = CurPtr + strlen(CurPtr);
-
- // If we really reached the end of the file, return it.
- if (Ptr == MB.getBufferEnd())
- return Ptr;
-
- // Otherwise, just skip this section of the file, including the nul.
- return FindStringInBuffer(Str, Ptr+1, MB);
+class Pattern {
+ /// Str - The string to match.
+ StringRef Str;
+public:
+
+ Pattern() { }
+
+ bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
+
+ /// Match - Match the pattern string against the input buffer Buffer. This
+ /// returns the position that is matched or npos if there is no match. If
+ /// there is a match, the size of the matched string is returned in MatchLen.
+ size_t Match(StringRef Buffer, size_t &MatchLen) const {
+ MatchLen = Str.size();
+ return Buffer.find(Str);
+ }
+};
+
+bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
+ // Ignore trailing whitespace.
+ while (!PatternStr.empty() &&
+ (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
+ PatternStr = PatternStr.substr(0, PatternStr.size()-1);
+
+ // Check that there is something on the line.
+ if (PatternStr.empty()) {
+ SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+ "found empty check string with prefix '"+CheckPrefix+":'",
+ "error");
+ return true;
+ }
+
+
+
+ Str = PatternStr;
+ return false;
}
+
+//===----------------------------------------------------------------------===//
+// Check Strings.
+//===----------------------------------------------------------------------===//
+
+/// CheckString - This is a check that we found in the input file.
+struct CheckString {
+ /// Pat - The pattern to match.
+ Pattern Pat;
+
+ /// Loc - The location in the match file that the check string was specified.
+ SMLoc Loc;
+
+ /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed
+ /// to a CHECK: directive.
+ bool IsCheckNext;
+
+ /// NotStrings - These are all of the strings that are disallowed from
+ /// occurring between this match string and the previous one (or start of
+ /// file).
+ std::vector<std::pair<SMLoc, Pattern> > NotStrings;
+
+ CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
+ : Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
+};
+
+/// CanonicalizeInputFile - Remove duplicate horizontal space from the specified
+/// memory buffer, free it, and return a new one.
+static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
+ SmallVector<char, 16> NewFile;
+ NewFile.reserve(MB->getBufferSize());
+
+ for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
+ Ptr != End; ++Ptr) {
+ // If C is not a horizontal whitespace, skip it.
+ if (*Ptr != ' ' && *Ptr != '\t') {
+ NewFile.push_back(*Ptr);
+ continue;
+ }
+
+ // Otherwise, add one space and advance over neighboring space.
+ NewFile.push_back(' ');
+ while (Ptr+1 != End &&
+ (Ptr[1] == ' ' || Ptr[1] == '\t'))
+ ++Ptr;
+ }
+
+ // Free the old buffer and return a new one.
+ MemoryBuffer *MB2 =
+ MemoryBuffer::getMemBufferCopy(NewFile.data(),
+ NewFile.data() + NewFile.size(),
+ MB->getBufferIdentifier());
+
+ delete MB;
+ return MB2;
+}
+
+
/// ReadCheckFile - Read the check file, which specifies the sequence of
/// expected strings. The strings are added to the CheckStrings vector.
static bool ReadCheckFile(SourceMgr &SM,
- std::vector<std::pair<std::string, SMLoc> >
- &CheckStrings) {
+ std::vector<CheckString> &CheckStrings) {
// Open the check file, and tell SourceMgr about it.
std::string ErrorStr;
MemoryBuffer *F =
<< ErrorStr << '\n';
return true;
}
+
+ // If we want to canonicalize whitespace, strip excess whitespace from the
+ // buffer containing the CHECK lines.
+ if (!NoCanonicalizeWhiteSpace)
+ F = CanonicalizeInputFile(F);
+
SM.AddNewSourceBuffer(F, SMLoc());
- // Find all instances of CheckPrefix followed by : in the file. The
- // MemoryBuffer is guaranteed to be nul terminated, but may have nul's
- // embedded into it. We don't support check strings with embedded nuls.
- std::string Prefix = CheckPrefix + ":";
- const char *CurPtr = F->getBufferStart(), *BufferEnd = F->getBufferEnd();
+ // Find all instances of CheckPrefix followed by : in the file.
+ StringRef Buffer = F->getBuffer();
+ std::vector<std::pair<SMLoc, Pattern> > NotMatches;
+
while (1) {
// See if Prefix occurs in the memory buffer.
- const char *Ptr = FindStringInBuffer(Prefix.c_str(), CurPtr, *F);
+ Buffer = Buffer.substr(Buffer.find(CheckPrefix));
// If we didn't find a match, we're done.
- if (Ptr == BufferEnd)
+ if (Buffer.empty())
break;
+ const char *CheckPrefixStart = Buffer.data();
+
+ // When we find a check prefix, keep track of whether we find CHECK: or
+ // CHECK-NEXT:
+ bool IsCheckNext = false, IsCheckNot = false;
+
+ // Verify that the : is present after the prefix.
+ if (Buffer[CheckPrefix.size()] == ':') {
+ Buffer = Buffer.substr(CheckPrefix.size()+1);
+ } else if (Buffer.size() > CheckPrefix.size()+6 &&
+ memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
+ Buffer = Buffer.substr(CheckPrefix.size()+7);
+ IsCheckNext = true;
+ } else if (Buffer.size() > CheckPrefix.size()+5 &&
+ memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
+ Buffer = Buffer.substr(CheckPrefix.size()+6);
+ IsCheckNot = true;
+ } else {
+ Buffer = Buffer.substr(1);
+ continue;
+ }
+
// Okay, we found the prefix, yay. Remember the rest of the line, but
// ignore leading and trailing whitespace.
- Ptr += Prefix.size();
- while (*Ptr == ' ' || *Ptr == '\t')
- ++Ptr;
+ Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
// Scan ahead to the end of line.
- CurPtr = Ptr;
- while (CurPtr != BufferEnd && *CurPtr != '\n' && *CurPtr != '\r')
- ++CurPtr;
-
- // Ignore trailing whitespace.
- while (CurPtr[-1] == ' ' || CurPtr[-1] == '\t')
- --CurPtr;
-
- // Check that there is something on the line.
- if (Ptr >= CurPtr) {
- SM.PrintMessage(SMLoc::getFromPointer(CurPtr),
- "found empty check string with prefix '"+Prefix+"'",
- "error");
+ size_t EOL = Buffer.find_first_of("\n\r");
+
+ // Parse the pattern.
+ Pattern P;
+ if (P.ParsePattern(Buffer.substr(0, EOL), SM))
+ return true;
+
+ Buffer = Buffer.substr(EOL);
+
+
+ // Verify that CHECK-NEXT lines have at least one CHECK line before them.
+ if (IsCheckNext && CheckStrings.empty()) {
+ SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
+ "found '"+CheckPrefix+"-NEXT:' without previous '"+
+ CheckPrefix+ ": line", "error");
return true;
}
+ // Handle CHECK-NOT.
+ if (IsCheckNot) {
+ NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
+ P));
+ continue;
+ }
+
+
// Okay, add the string we captured to the output vector and move on.
- CheckStrings.push_back(std::make_pair(std::string(Ptr, CurPtr),
- SMLoc::getFromPointer(Ptr)));
+ CheckStrings.push_back(CheckString(P,
+ SMLoc::getFromPointer(Buffer.data()),
+ IsCheckNext));
+ std::swap(NotMatches, CheckStrings.back().NotStrings);
}
if (CheckStrings.empty()) {
- errs() << "error: no check strings found with prefix '" << Prefix << "'\n";
+ errs() << "error: no check strings found with prefix '" << CheckPrefix
+ << ":'\n";
+ return true;
+ }
+
+ if (!NotMatches.empty()) {
+ errs() << "error: '" << CheckPrefix
+ << "-NOT:' not supported after last check line.\n";
return true;
}
return false;
}
-// CanonicalizeCheckStrings - Replace all sequences of horizontal whitespace in
-// the check strings with a single space.
-static void CanonicalizeCheckStrings(std::vector<std::pair<std::string, SMLoc> >
- &CheckStrings) {
- for (unsigned i = 0, e = CheckStrings.size(); i != e; ++i) {
- std::string &Str = CheckStrings[i].first;
-
- for (unsigned C = 0; C != Str.size(); ++C) {
- // If C is not a horizontal whitespace, skip it.
- if (Str[C] != ' ' && Str[C] != '\t')
- continue;
-
- // Replace the character with space, then remove any other space
- // characters after it.
- Str[C] = ' ';
-
- while (C+1 != Str.size() &&
- (Str[C+1] == ' ' || Str[C+1] == '\t'))
- Str.erase(Str.begin()+C+1);
- }
- }
+static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
+ StringRef Buffer) {
+ // Otherwise, we have an error, emit an error message.
+ SM.PrintMessage(CheckStr.Loc, "expected string not found in input",
+ "error");
+
+ // Print the "scanning from here" line. If the current position is at the
+ // end of a line, advance to the start of the next line.
+ Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
+
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here",
+ "note");
}
-/// CanonicalizeInputFile - Remove duplicate horizontal space from the specified
-/// memory buffer, free it, and return a new one.
-static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
- SmallVector<char, 16> NewFile;
- NewFile.reserve(MB->getBufferSize());
-
- for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
- Ptr != End; ++Ptr) {
- // If C is not a horizontal whitespace, skip it.
- if (*Ptr != ' ' && *Ptr != '\t') {
- NewFile.push_back(*Ptr);
- continue;
- }
+/// CountNumNewlinesBetween - Count the number of newlines in the specified
+/// range.
+static unsigned CountNumNewlinesBetween(StringRef Range) {
+ unsigned NumNewLines = 0;
+ while (1) {
+ // Scan for newline.
+ Range = Range.substr(Range.find_first_of("\n\r"));
+ if (Range.empty()) return NumNewLines;
- // Otherwise, add one space and advance over neighboring space.
- NewFile.push_back(' ');
- while (Ptr+1 != End &&
- (Ptr[1] == ' ' || Ptr[1] == '\t'))
- ++Ptr;
+ ++NumNewLines;
+
+ // Handle \n\r and \r\n as a single newline.
+ if (Range.size() > 1 &&
+ (Range[1] == '\n' || Range[1] == '\r') &&
+ (Range[0] != Range[1]))
+ Range = Range.substr(1);
+ Range = Range.substr(1);
}
-
- // Free the old buffer and return a new one.
- MemoryBuffer *MB2 =
- MemoryBuffer::getMemBufferCopy(NewFile.data(),
- NewFile.data() + NewFile.size(),
- MB->getBufferIdentifier());
-
- delete MB;
- return MB2;
}
-
int main(int argc, char **argv) {
sys::PrintStackTraceOnErrorSignal();
PrettyStackTraceProgram X(argc, argv);
SourceMgr SM;
// Read the expected strings from the check file.
- std::vector<std::pair<std::string, SMLoc> > CheckStrings;
+ std::vector<CheckString> CheckStrings;
if (ReadCheckFile(SM, CheckStrings))
return 2;
- // Remove duplicate spaces in the check strings if requested.
- if (!NoCanonicalizeWhiteSpace)
- CanonicalizeCheckStrings(CheckStrings);
-
// Open the file to check and add it to SourceMgr.
std::string ErrorStr;
MemoryBuffer *F =
// Check that we have all of the expected strings, in order, in the input
// file.
- const char *CurPtr = F->getBufferStart(), *BufferEnd = F->getBufferEnd();
+ StringRef Buffer = F->getBuffer();
+
+ const char *LastMatch = Buffer.data();
for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
- const std::pair<std::string, SMLoc> &CheckStr = CheckStrings[StrNo];
+ const CheckString &CheckStr = CheckStrings[StrNo];
+
+ StringRef SearchFrom = Buffer;
// Find StrNo in the file.
- const char *Ptr = FindStringInBuffer(CheckStr.first.c_str(), CurPtr, *F);
+ size_t MatchLen = 0;
+ Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen));
- // If we found a match, we're done, move on.
- if (Ptr != BufferEnd) {
- CurPtr = Ptr + CheckStr.first.size();
- continue;
+ // If we didn't find a match, reject the input.
+ if (Buffer.empty()) {
+ PrintCheckFailed(SM, CheckStr, SearchFrom);
+ return 1;
+ }
+
+ StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch);
+
+ // If this check is a "CHECK-NEXT", verify that the previous match was on
+ // the previous line (i.e. that there is one newline between them).
+ if (CheckStr.IsCheckNext) {
+ // Count the number of newlines between the previous match and this one.
+ assert(LastMatch != F->getBufferStart() &&
+ "CHECK-NEXT can't be the first check in a file");
+
+ unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
+ if (NumNewLines == 0) {
+ SM.PrintMessage(CheckStr.Loc,
+ CheckPrefix+"-NEXT: is on the same line as previous match",
+ "error");
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
+ "'next' match was here", "note");
+ SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
+ "previous match was here", "note");
+ return 1;
+ }
+
+ if (NumNewLines != 1) {
+ SM.PrintMessage(CheckStr.Loc,
+ CheckPrefix+
+ "-NEXT: is not on the line after the previous match",
+ "error");
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
+ "'next' match was here", "note");
+ SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
+ "previous match was here", "note");
+ return 1;
+ }
}
- // Otherwise, we have an error, emit an error message.
- SM.PrintMessage(CheckStr.second, "expected string not found in input",
- "error");
-
- // Print the "scanning from here" line. If the current position is at the
- // end of a line, advance to the start of the next line.
- const char *Scan = CurPtr;
- while (Scan != BufferEnd &&
- (*Scan == ' ' || *Scan == '\t'))
- ++Scan;
- if (*Scan == '\n' || *Scan == '\r')
- CurPtr = Scan+1;
-
+ // If this match had "not strings", verify that they don't exist in the
+ // skipped region.
+ for (unsigned i = 0, e = CheckStr.NotStrings.size(); i != e; ++i) {
+ size_t MatchLen = 0;
+ size_t Pos = CheckStr.NotStrings[i].second.Match(SkippedRegion, MatchLen);
+ if (Pos == StringRef::npos) continue;
+
+ SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
+ CheckPrefix+"-NOT: string occurred!", "error");
+ SM.PrintMessage(CheckStr.NotStrings[i].first,
+ CheckPrefix+"-NOT: pattern specified here", "note");
+ return 1;
+ }
- SM.PrintMessage(SMLoc::getFromPointer(CurPtr), "scanning from here",
- "note");
- return 1;
+
+ // Otherwise, everything is good. Step over the matched text and remember
+ // the position after the match as the end of the last match.
+ Buffer = Buffer.substr(MatchLen);
+ LastMatch = Buffer.data();
}
return 0;