1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content. This is useful for regression tests etc.
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
17 //===----------------------------------------------------------------------===//
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
34 #include <system_error>
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
53 static cl::list<std::string> ImplicitCheckNot(
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
60 typedef cl::list<std::string>::const_iterator prefix_iterator;
62 //===----------------------------------------------------------------------===//
63 // Pattern Handling Code.
64 //===----------------------------------------------------------------------===//
75 /// MatchEOF - When set, this pattern only matches the end of file. This is
76 /// used for trailing CHECK-NOTs.
84 Check::CheckType CheckTy;
86 /// FixedStr - If non-empty, this pattern is a fixed string match with the
87 /// specified fixed string.
90 /// RegEx - If non-empty, this is a regex pattern.
93 /// \brief Contains the number of line this pattern is in.
96 /// VariableUses - Entries in this vector map to uses of a variable in the
97 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
98 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
99 /// value of bar at offset 3.
100 std::vector<std::pair<StringRef, unsigned> > VariableUses;
102 /// VariableDefs - Maps definitions of variables to their parenthesized
104 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
105 std::map<StringRef, unsigned> VariableDefs;
109 Pattern(Check::CheckType Ty)
112 /// getLoc - Return the location in source code.
113 SMLoc getLoc() const { return PatternLoc; }
115 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
116 /// which prefix is being matched, SM provides the SourceMgr used for error
117 /// reports, and LineNumber is the line number in the input file from which
118 /// the pattern string was read. Returns true in case of an error, false
120 bool ParsePattern(StringRef PatternStr,
123 unsigned LineNumber);
125 /// Match - Match the pattern string against the input buffer Buffer. This
126 /// returns the position that is matched or npos if there is no match. If
127 /// there is a match, the size of the matched string is returned in MatchLen.
129 /// The VariableTable StringMap provides the current values of filecheck
130 /// variables and is updated if this match defines new values.
131 size_t Match(StringRef Buffer, size_t &MatchLen,
132 StringMap<StringRef> &VariableTable) const;
134 /// PrintFailureInfo - Print additional information about a failure to match
135 /// involving this pattern.
136 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
137 const StringMap<StringRef> &VariableTable) const;
139 bool hasVariable() const { return !(VariableUses.empty() &&
140 VariableDefs.empty()); }
142 Check::CheckType getCheckTy() const { return CheckTy; }
145 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
146 void AddBackrefToRegEx(unsigned BackrefNum);
148 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
149 /// matching this pattern at the start of \arg Buffer; a distance of zero
150 /// should correspond to a perfect match.
151 unsigned ComputeMatchDistance(StringRef Buffer,
152 const StringMap<StringRef> &VariableTable) const;
154 /// \brief Evaluates expression and stores the result to \p Value.
155 /// \return true on success. false when the expression has invalid syntax.
156 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
158 /// \brief Finds the closing sequence of a regex variable usage or
159 /// definition. Str has to point in the beginning of the definition
160 /// (right after the opening sequence).
161 /// \return offset of the closing sequence within Str, or npos if it was not
163 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
167 bool Pattern::ParsePattern(StringRef PatternStr,
170 unsigned LineNumber) {
171 this->LineNumber = LineNumber;
172 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
174 // Ignore trailing whitespace.
175 while (!PatternStr.empty() &&
176 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
177 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
179 // Check that there is something on the line.
180 if (PatternStr.empty()) {
181 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
182 "found empty check string with prefix '" +
187 // Check to see if this is a fixed string, or if it has regex pieces.
188 if (PatternStr.size() < 2 ||
189 (PatternStr.find("{{") == StringRef::npos &&
190 PatternStr.find("[[") == StringRef::npos)) {
191 FixedStr = PatternStr;
195 // Paren value #0 is for the fully matched string. Any new parenthesized
196 // values add from there.
197 unsigned CurParen = 1;
199 // Otherwise, there is at least one regex piece. Build up the regex pattern
200 // by escaping scary characters in fixed strings, building up one big regex.
201 while (!PatternStr.empty()) {
203 if (PatternStr.startswith("{{")) {
204 // This is the start of a regex match. Scan for the }}.
205 size_t End = PatternStr.find("}}");
206 if (End == StringRef::npos) {
207 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
209 "found start of regex string with no end '}}'");
213 // Enclose {{}} patterns in parens just like [[]] even though we're not
214 // capturing the result for any purpose. This is required in case the
215 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
216 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
220 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
224 PatternStr = PatternStr.substr(End+2);
228 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
229 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
230 // second form is [[foo]] which is a reference to foo. The variable name
231 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
232 // it. This is to catch some common errors.
233 if (PatternStr.startswith("[[")) {
234 // Find the closing bracket pair ending the match. End is going to be an
235 // offset relative to the beginning of the match string.
236 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
238 if (End == StringRef::npos) {
239 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
241 "invalid named regex reference, no ]] found");
245 StringRef MatchStr = PatternStr.substr(2, End);
246 PatternStr = PatternStr.substr(End+4);
248 // Get the regex name (e.g. "foo").
249 size_t NameEnd = MatchStr.find(':');
250 StringRef Name = MatchStr.substr(0, NameEnd);
253 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
254 "invalid name in named regex: empty name");
258 // Verify that the name/expression is well formed. FileCheck currently
259 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
260 // is relaxed, more strict check is performed in \c EvaluateExpression.
261 bool IsExpression = false;
262 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
263 if (i == 0 && Name[i] == '@') {
264 if (NameEnd != StringRef::npos) {
265 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
267 "invalid name in named regex definition");
273 if (Name[i] != '_' && !isalnum(Name[i]) &&
274 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
275 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
276 SourceMgr::DK_Error, "invalid name in named regex");
281 // Name can't start with a digit.
282 if (isdigit(static_cast<unsigned char>(Name[0]))) {
283 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
284 "invalid name in named regex");
289 if (NameEnd == StringRef::npos) {
290 // Handle variables that were defined earlier on the same line by
291 // emitting a backreference.
292 if (VariableDefs.find(Name) != VariableDefs.end()) {
293 unsigned VarParenNum = VariableDefs[Name];
294 if (VarParenNum < 1 || VarParenNum > 9) {
295 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
297 "Can't back-reference more than 9 variables");
300 AddBackrefToRegEx(VarParenNum);
302 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
307 // Handle [[foo:.*]].
308 VariableDefs[Name] = CurParen;
312 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
318 // Handle fixed string matches.
319 // Find the end, which is the start of the next regex.
320 size_t FixedMatchEnd = PatternStr.find("{{");
321 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
322 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
323 PatternStr = PatternStr.substr(FixedMatchEnd);
329 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
333 if (!R.isValid(Error)) {
334 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
335 "invalid regex: " + Error);
339 RegExStr += RS.str();
340 CurParen += R.getNumMatches();
344 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
345 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
346 std::string Backref = std::string("\\") +
347 std::string(1, '0' + BackrefNum);
351 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
352 // The only supported expression is @LINE([\+-]\d+)?
353 if (!Expr.startswith("@LINE"))
355 Expr = Expr.substr(StringRef("@LINE").size());
359 Expr = Expr.substr(1);
360 else if (Expr[0] != '-')
362 if (Expr.getAsInteger(10, Offset))
365 Value = llvm::itostr(LineNumber + Offset);
369 /// Match - Match the pattern string against the input buffer Buffer. This
370 /// returns the position that is matched or npos if there is no match. If
371 /// there is a match, the size of the matched string is returned in MatchLen.
372 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
373 StringMap<StringRef> &VariableTable) const {
374 // If this is the EOF pattern, match it immediately.
375 if (CheckTy == Check::CheckEOF) {
377 return Buffer.size();
380 // If this is a fixed string pattern, just match it now.
381 if (!FixedStr.empty()) {
382 MatchLen = FixedStr.size();
383 return Buffer.find(FixedStr);
388 // If there are variable uses, we need to create a temporary string with the
390 StringRef RegExToMatch = RegExStr;
392 if (!VariableUses.empty()) {
395 unsigned InsertOffset = 0;
396 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
399 if (VariableUses[i].first[0] == '@') {
400 if (!EvaluateExpression(VariableUses[i].first, Value))
401 return StringRef::npos;
403 StringMap<StringRef>::iterator it =
404 VariableTable.find(VariableUses[i].first);
405 // If the variable is undefined, return an error.
406 if (it == VariableTable.end())
407 return StringRef::npos;
409 // Look up the value and escape it so that we can put it into the regex.
410 Value += Regex::escape(it->second);
413 // Plop it into the regex at the adjusted offset.
414 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
415 Value.begin(), Value.end());
416 InsertOffset += Value.size();
419 // Match the newly constructed regex.
420 RegExToMatch = TmpStr;
424 SmallVector<StringRef, 4> MatchInfo;
425 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
426 return StringRef::npos;
428 // Successful regex match.
429 assert(!MatchInfo.empty() && "Didn't get any match");
430 StringRef FullMatch = MatchInfo[0];
432 // If this defines any variables, remember their values.
433 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
434 E = VariableDefs.end();
436 assert(I->second < MatchInfo.size() && "Internal paren error");
437 VariableTable[I->first] = MatchInfo[I->second];
440 MatchLen = FullMatch.size();
441 return FullMatch.data()-Buffer.data();
444 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
445 const StringMap<StringRef> &VariableTable) const {
446 // Just compute the number of matching characters. For regular expressions, we
447 // just compare against the regex itself and hope for the best.
449 // FIXME: One easy improvement here is have the regex lib generate a single
450 // example regular expression which matches, and use that as the example
452 StringRef ExampleString(FixedStr);
453 if (ExampleString.empty())
454 ExampleString = RegExStr;
456 // Only compare up to the first line in the buffer, or the string size.
457 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
458 BufferPrefix = BufferPrefix.split('\n').first;
459 return BufferPrefix.edit_distance(ExampleString);
462 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
463 const StringMap<StringRef> &VariableTable) const{
464 // If this was a regular expression using variables, print the current
466 if (!VariableUses.empty()) {
467 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
468 SmallString<256> Msg;
469 raw_svector_ostream OS(Msg);
470 StringRef Var = VariableUses[i].first;
473 if (EvaluateExpression(Var, Value)) {
474 OS << "with expression \"";
475 OS.write_escaped(Var) << "\" equal to \"";
476 OS.write_escaped(Value) << "\"";
478 OS << "uses incorrect expression \"";
479 OS.write_escaped(Var) << "\"";
482 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
484 // Check for undefined variable references.
485 if (it == VariableTable.end()) {
486 OS << "uses undefined variable \"";
487 OS.write_escaped(Var) << "\"";
489 OS << "with variable \"";
490 OS.write_escaped(Var) << "\" equal to \"";
491 OS.write_escaped(it->second) << "\"";
495 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
500 // Attempt to find the closest/best fuzzy match. Usually an error happens
501 // because some string in the output didn't exactly match. In these cases, we
502 // would like to show the user a best guess at what "should have" matched, to
503 // save them having to actually check the input manually.
504 size_t NumLinesForward = 0;
505 size_t Best = StringRef::npos;
506 double BestQuality = 0;
508 // Use an arbitrary 4k limit on how far we will search.
509 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
510 if (Buffer[i] == '\n')
513 // Patterns have leading whitespace stripped, so skip whitespace when
514 // looking for something which looks like a pattern.
515 if (Buffer[i] == ' ' || Buffer[i] == '\t')
518 // Compute the "quality" of this match as an arbitrary combination of the
519 // match distance and the number of lines skipped to get to this match.
520 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
521 double Quality = Distance + (NumLinesForward / 100.);
523 if (Quality < BestQuality || Best == StringRef::npos) {
525 BestQuality = Quality;
529 // Print the "possible intended match here" line if we found something
530 // reasonable and not equal to what we showed in the "scanning from here"
532 if (Best && Best != StringRef::npos && BestQuality < 50) {
533 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
534 SourceMgr::DK_Note, "possible intended match here");
536 // FIXME: If we wanted to be really friendly we would show why the match
537 // failed, as it can be hard to spot simple one character differences.
541 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
542 // Offset keeps track of the current offset within the input Str
544 // [...] Nesting depth
545 size_t BracketDepth = 0;
547 while (!Str.empty()) {
548 if (Str.startswith("]]") && BracketDepth == 0)
550 if (Str[0] == '\\') {
551 // Backslash escapes the next char within regexes, so skip them both.
562 if (BracketDepth == 0) {
563 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
565 "missing closing \"]\" for regex variable");
576 return StringRef::npos;
580 //===----------------------------------------------------------------------===//
582 //===----------------------------------------------------------------------===//
584 /// CheckString - This is a check that we found in the input file.
586 /// Pat - The pattern to match.
589 /// Prefix - Which prefix name this check matched.
592 /// Loc - The location in the match file that the check string was specified.
595 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
596 /// as opposed to a CHECK: directive.
597 Check::CheckType CheckTy;
599 /// DagNotStrings - These are all of the strings that are disallowed from
600 /// occurring between this match string and the previous one (or start of
602 std::vector<Pattern> DagNotStrings;
605 CheckString(const Pattern &P,
609 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
611 /// Check - Match check string and its "not strings" and/or "dag strings".
612 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
613 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
615 /// CheckNext - Verify there is a single line in the given buffer.
616 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
618 /// CheckNot - Verify there's no "not strings" in the given buffer.
619 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
620 const std::vector<const Pattern *> &NotStrings,
621 StringMap<StringRef> &VariableTable) const;
623 /// CheckDag - Match "dag strings" and their mixed "not strings".
624 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
625 std::vector<const Pattern *> &NotStrings,
626 StringMap<StringRef> &VariableTable) const;
629 /// Canonicalize whitespaces in the input file. Line endings are replaced
630 /// with UNIX-style '\n'.
632 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
633 /// characters to a single space.
634 static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB,
635 bool PreserveHorizontal) {
636 SmallString<128> NewFile;
637 NewFile.reserve(MB->getBufferSize());
639 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
641 // Eliminate trailing dosish \r.
642 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
646 // If current char is not a horizontal whitespace or if horizontal
647 // whitespace canonicalization is disabled, dump it to output as is.
648 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
649 NewFile.push_back(*Ptr);
653 // Otherwise, add one space and advance over neighboring space.
654 NewFile.push_back(' ');
655 while (Ptr+1 != End &&
656 (Ptr[1] == ' ' || Ptr[1] == '\t'))
660 // Free the old buffer and return a new one.
662 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
668 static bool IsPartOfWord(char c) {
669 return (isalnum(c) || c == '-' || c == '_');
672 // Get the size of the prefix extension.
673 static size_t CheckTypeSize(Check::CheckType Ty) {
675 case Check::CheckNone:
678 case Check::CheckPlain:
679 return sizeof(":") - 1;
681 case Check::CheckNext:
682 return sizeof("-NEXT:") - 1;
684 case Check::CheckNot:
685 return sizeof("-NOT:") - 1;
687 case Check::CheckDAG:
688 return sizeof("-DAG:") - 1;
690 case Check::CheckLabel:
691 return sizeof("-LABEL:") - 1;
693 case Check::CheckEOF:
694 llvm_unreachable("Should not be using EOF size");
697 llvm_unreachable("Bad check type");
700 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
701 char NextChar = Buffer[Prefix.size()];
703 // Verify that the : is present after the prefix.
705 return Check::CheckPlain;
708 return Check::CheckNone;
710 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
711 if (Rest.startswith("NEXT:"))
712 return Check::CheckNext;
714 if (Rest.startswith("NOT:"))
715 return Check::CheckNot;
717 if (Rest.startswith("DAG:"))
718 return Check::CheckDAG;
720 if (Rest.startswith("LABEL:"))
721 return Check::CheckLabel;
723 return Check::CheckNone;
726 // From the given position, find the next character after the word.
727 static size_t SkipWord(StringRef Str, size_t Loc) {
728 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
733 // Try to find the first match in buffer for any prefix. If a valid match is
734 // found, return that prefix and set its type and location. If there are almost
735 // matches (e.g. the actual prefix string is found, but is not an actual check
736 // string), but no valid match, return an empty string and set the position to
737 // resume searching from. If no partial matches are found, return an empty
738 // string and the location will be StringRef::npos. If one prefix is a substring
739 // of another, the maximal match should be found. e.g. if "A" and "AA" are
740 // prefixes then AA-CHECK: should match the second one.
741 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
742 Check::CheckType &CheckTy,
744 StringRef FirstPrefix;
745 size_t FirstLoc = StringRef::npos;
746 size_t SearchLoc = StringRef::npos;
747 Check::CheckType FirstTy = Check::CheckNone;
749 CheckTy = Check::CheckNone;
750 CheckLoc = StringRef::npos;
752 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
754 StringRef Prefix(*I);
755 size_t PrefixLoc = Buffer.find(Prefix);
757 if (PrefixLoc == StringRef::npos)
760 // Track where we are searching for invalid prefixes that look almost right.
761 // We need to only advance to the first partial match on the next attempt
762 // since a partial match could be a substring of a later, valid prefix.
763 // Need to skip to the end of the word, otherwise we could end up
764 // matching a prefix in a substring later.
765 if (PrefixLoc < SearchLoc)
766 SearchLoc = SkipWord(Buffer, PrefixLoc);
768 // We only want to find the first match to avoid skipping some.
769 if (PrefixLoc > FirstLoc)
771 // If one matching check-prefix is a prefix of another, choose the
773 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
776 StringRef Rest = Buffer.drop_front(PrefixLoc);
777 // Make sure we have actually found the prefix, and not a word containing
778 // it. This should also prevent matching the wrong prefix when one is a
779 // substring of another.
780 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
781 FirstTy = Check::CheckNone;
783 FirstTy = FindCheckType(Rest, Prefix);
785 FirstLoc = PrefixLoc;
786 FirstPrefix = Prefix;
789 // If the first prefix is invalid, we should continue the search after it.
790 if (FirstTy == Check::CheckNone) {
791 CheckLoc = SearchLoc;
800 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
801 unsigned &LineNumber,
802 Check::CheckType &CheckTy,
804 while (!Buffer.empty()) {
805 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
806 // If we found a real match, we are done.
807 if (!Prefix.empty()) {
808 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
812 // We didn't find any almost matches either, we are also done.
813 if (CheckLoc == StringRef::npos)
816 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
818 // Advance to the last possible match we found and try again.
819 Buffer = Buffer.drop_front(CheckLoc + 1);
825 /// ReadCheckFile - Read the check file, which specifies the sequence of
826 /// expected strings. The strings are added to the CheckStrings vector.
827 /// Returns true in case of an error, false otherwise.
828 static bool ReadCheckFile(SourceMgr &SM,
829 std::vector<CheckString> &CheckStrings) {
830 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
831 MemoryBuffer::getFileOrSTDIN(CheckFilename);
832 if (std::error_code EC = FileOrErr.getError()) {
833 errs() << "Could not open check file '" << CheckFilename
834 << "': " << EC.message() << '\n';
838 // If we want to canonicalize whitespace, strip excess whitespace from the
839 // buffer containing the CHECK lines. Remove DOS style line endings.
840 MemoryBuffer *F = CanonicalizeInputFile(FileOrErr.get().release(),
841 NoCanonicalizeWhiteSpace);
843 SM.AddNewSourceBuffer(F, SMLoc());
845 // Find all instances of CheckPrefix followed by : in the file.
846 StringRef Buffer = F->getBuffer();
848 std::vector<Pattern> ImplicitNegativeChecks;
849 for (const auto &PatternString : ImplicitCheckNot) {
850 // Create a buffer with fake command line content in order to display the
851 // command line option responsible for the specific implicit CHECK-NOT.
852 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
853 std::string Suffix = "'";
854 MemoryBuffer *CmdLine = MemoryBuffer::getMemBufferCopy(
855 Prefix + PatternString + Suffix, "command line");
856 StringRef PatternInBuffer =
857 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
858 SM.AddNewSourceBuffer(CmdLine, SMLoc());
860 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
861 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
862 "IMPLICIT-CHECK", SM, 0);
866 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
868 // LineNumber keeps track of the line on which CheckPrefix instances are
870 unsigned LineNumber = 1;
873 Check::CheckType CheckTy;
876 // See if a prefix occurs in the memory buffer.
877 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
881 if (UsedPrefix.empty())
884 Buffer = Buffer.drop_front(PrefixLoc);
886 // Location to use for error messages.
887 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
889 // PrefixLoc is to the start of the prefix. Skip to the end.
890 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
892 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
893 // leading and trailing whitespace.
894 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
896 // Scan ahead to the end of line.
897 size_t EOL = Buffer.find_first_of("\n\r");
899 // Remember the location of the start of the pattern, for diagnostics.
900 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
902 // Parse the pattern.
904 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
907 // Verify that CHECK-LABEL lines do not define or use variables
908 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
909 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
911 "found '" + UsedPrefix + "-LABEL:'"
912 " with variable definition or use");
916 Buffer = Buffer.substr(EOL);
918 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
919 if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
920 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
922 "found '" + UsedPrefix + "-NEXT:' without previous '"
923 + UsedPrefix + ": line");
927 // Handle CHECK-DAG/-NOT.
928 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
929 DagNotMatches.push_back(P);
933 // Okay, add the string we captured to the output vector and move on.
934 CheckStrings.push_back(CheckString(P,
938 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
939 DagNotMatches = ImplicitNegativeChecks;
942 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
943 // prefix as a filler for the error message.
944 if (!DagNotMatches.empty()) {
945 CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
947 SMLoc::getFromPointer(Buffer.data()),
949 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
952 if (CheckStrings.empty()) {
953 errs() << "error: no check strings found with prefix"
954 << (CheckPrefixes.size() > 1 ? "es " : " ");
955 for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
956 StringRef Prefix(CheckPrefixes[I]);
957 errs() << '\'' << Prefix << ":'";
969 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
970 const Pattern &Pat, StringRef Buffer,
971 StringMap<StringRef> &VariableTable) {
972 // Otherwise, we have an error, emit an error message.
973 SM.PrintMessage(Loc, SourceMgr::DK_Error,
974 "expected string not found in input");
976 // Print the "scanning from here" line. If the current position is at the
977 // end of a line, advance to the start of the next line.
978 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
980 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
981 "scanning from here");
983 // Allow the pattern to print additional information if desired.
984 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
987 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
989 StringMap<StringRef> &VariableTable) {
990 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
993 /// CountNumNewlinesBetween - Count the number of newlines in the specified
995 static unsigned CountNumNewlinesBetween(StringRef Range,
996 const char *&FirstNewLine) {
997 unsigned NumNewLines = 0;
1000 Range = Range.substr(Range.find_first_of("\n\r"));
1001 if (Range.empty()) return NumNewLines;
1005 // Handle \n\r and \r\n as a single newline.
1006 if (Range.size() > 1 &&
1007 (Range[1] == '\n' || Range[1] == '\r') &&
1008 (Range[0] != Range[1]))
1009 Range = Range.substr(1);
1010 Range = Range.substr(1);
1012 if (NumNewLines == 1)
1013 FirstNewLine = Range.begin();
1017 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1018 bool IsLabelScanMode, size_t &MatchLen,
1019 StringMap<StringRef> &VariableTable) const {
1021 std::vector<const Pattern *> NotStrings;
1023 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1024 // bounds; we have not processed variable definitions within the bounded block
1025 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1026 // over the block again (including the last CHECK-LABEL) in normal mode.
1027 if (!IsLabelScanMode) {
1028 // Match "dag strings" (with mixed "not strings" if any).
1029 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1030 if (LastPos == StringRef::npos)
1031 return StringRef::npos;
1034 // Match itself from the last position after matching CHECK-DAG.
1035 StringRef MatchBuffer = Buffer.substr(LastPos);
1036 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1037 if (MatchPos == StringRef::npos) {
1038 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1039 return StringRef::npos;
1041 MatchPos += LastPos;
1043 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1045 if (!IsLabelScanMode) {
1046 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1048 // If this check is a "CHECK-NEXT", verify that the previous match was on
1049 // the previous line (i.e. that there is one newline between them).
1050 if (CheckNext(SM, SkippedRegion))
1051 return StringRef::npos;
1053 // If this match had "not strings", verify that they don't exist in the
1055 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1056 return StringRef::npos;
1062 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1063 if (CheckTy != Check::CheckNext)
1066 // Count the number of newlines between the previous match and this one.
1067 assert(Buffer.data() !=
1069 SM.FindBufferContainingLoc(
1070 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1071 "CHECK-NEXT can't be the first check in a file");
1073 const char *FirstNewLine = nullptr;
1074 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1076 if (NumNewLines == 0) {
1077 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1078 "-NEXT: is on the same line as previous match");
1079 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1080 SourceMgr::DK_Note, "'next' match was here");
1081 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1082 "previous match ended here");
1086 if (NumNewLines != 1) {
1087 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1088 "-NEXT: is not on the line after the previous match");
1089 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1090 SourceMgr::DK_Note, "'next' match was here");
1091 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1092 "previous match ended here");
1093 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1094 "non-matching line after previous match is here");
1101 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1102 const std::vector<const Pattern *> &NotStrings,
1103 StringMap<StringRef> &VariableTable) const {
1104 for (unsigned ChunkNo = 0, e = NotStrings.size();
1105 ChunkNo != e; ++ChunkNo) {
1106 const Pattern *Pat = NotStrings[ChunkNo];
1107 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1109 size_t MatchLen = 0;
1110 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1112 if (Pos == StringRef::npos) continue;
1114 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1115 SourceMgr::DK_Error,
1116 Prefix + "-NOT: string occurred!");
1117 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1118 Prefix + "-NOT: pattern specified here");
1125 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1126 std::vector<const Pattern *> &NotStrings,
1127 StringMap<StringRef> &VariableTable) const {
1128 if (DagNotStrings.empty())
1132 size_t StartPos = LastPos;
1134 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1135 ChunkNo != e; ++ChunkNo) {
1136 const Pattern &Pat = DagNotStrings[ChunkNo];
1138 assert((Pat.getCheckTy() == Check::CheckDAG ||
1139 Pat.getCheckTy() == Check::CheckNot) &&
1140 "Invalid CHECK-DAG or CHECK-NOT!");
1142 if (Pat.getCheckTy() == Check::CheckNot) {
1143 NotStrings.push_back(&Pat);
1147 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1149 size_t MatchLen = 0, MatchPos;
1151 // CHECK-DAG always matches from the start.
1152 StringRef MatchBuffer = Buffer.substr(StartPos);
1153 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1154 // With a group of CHECK-DAGs, a single mismatching means the match on
1155 // that group of CHECK-DAGs fails immediately.
1156 if (MatchPos == StringRef::npos) {
1157 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1158 return StringRef::npos;
1160 // Re-calc it as the offset relative to the start of the original string.
1161 MatchPos += StartPos;
1163 if (!NotStrings.empty()) {
1164 if (MatchPos < LastPos) {
1166 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1167 SourceMgr::DK_Error,
1168 Prefix + "-DAG: found a match of CHECK-DAG"
1169 " reordering across a CHECK-NOT");
1170 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1172 Prefix + "-DAG: the farthest match of CHECK-DAG"
1174 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1175 Prefix + "-NOT: the crossed pattern specified"
1177 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1178 Prefix + "-DAG: the reordered pattern specified"
1180 return StringRef::npos;
1182 // All subsequent CHECK-DAGs should be matched from the farthest
1183 // position of all precedent CHECK-DAGs (including this one.)
1185 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1186 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1188 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1189 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1190 return StringRef::npos;
1191 // Clear "not strings".
1195 // Update the last position with CHECK-DAG matches.
1196 LastPos = std::max(MatchPos + MatchLen, LastPos);
1202 // A check prefix must contain only alphanumeric, hyphens and underscores.
1203 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1204 Regex Validator("^[a-zA-Z0-9_-]*$");
1205 return Validator.match(CheckPrefix);
1208 static bool ValidateCheckPrefixes() {
1209 StringSet<> PrefixSet;
1211 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1213 StringRef Prefix(*I);
1215 if (!PrefixSet.insert(Prefix))
1218 if (!ValidateCheckPrefix(Prefix))
1225 // I don't think there's a way to specify an initial value for cl::list,
1226 // so if nothing was specified, add the default
1227 static void AddCheckPrefixIfNeeded() {
1228 if (CheckPrefixes.empty())
1229 CheckPrefixes.push_back("CHECK");
1232 int main(int argc, char **argv) {
1233 sys::PrintStackTraceOnErrorSignal();
1234 PrettyStackTraceProgram X(argc, argv);
1235 cl::ParseCommandLineOptions(argc, argv);
1237 if (!ValidateCheckPrefixes()) {
1238 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1239 "start with a letter and contain only alphanumeric characters, "
1240 "hyphens and underscores\n";
1244 AddCheckPrefixIfNeeded();
1248 // Read the expected strings from the check file.
1249 std::vector<CheckString> CheckStrings;
1250 if (ReadCheckFile(SM, CheckStrings))
1253 // Open the file to check and add it to SourceMgr.
1254 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1255 MemoryBuffer::getFileOrSTDIN(InputFilename);
1256 if (std::error_code EC = FileOrErr.getError()) {
1257 errs() << "Could not open input file '" << InputFilename
1258 << "': " << EC.message() << '\n';
1261 std::unique_ptr<MemoryBuffer> File = std::move(FileOrErr.get());
1263 if (File->getBufferSize() == 0) {
1264 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1268 // Remove duplicate spaces in the input file if requested.
1269 // Remove DOS style line endings.
1271 CanonicalizeInputFile(File.release(), NoCanonicalizeWhiteSpace);
1273 SM.AddNewSourceBuffer(F, SMLoc());
1275 /// VariableTable - This holds all the current filecheck variables.
1276 StringMap<StringRef> VariableTable;
1278 // Check that we have all of the expected strings, in order, in the input
1280 StringRef Buffer = F->getBuffer();
1282 bool hasError = false;
1284 unsigned i = 0, j = 0, e = CheckStrings.size();
1287 StringRef CheckRegion;
1289 CheckRegion = Buffer;
1291 const CheckString &CheckLabelStr = CheckStrings[j];
1292 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1297 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1298 size_t MatchLabelLen = 0;
1299 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1300 MatchLabelLen, VariableTable);
1301 if (MatchLabelPos == StringRef::npos) {
1306 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1307 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1311 for ( ; i != j; ++i) {
1312 const CheckString &CheckStr = CheckStrings[i];
1314 // Check each string within the scanned region, including a second check
1315 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1316 size_t MatchLen = 0;
1317 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1320 if (MatchPos == StringRef::npos) {
1326 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1333 return hasError ? 1 : 0;