1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content. This is useful for regression tests etc.
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
17 //===----------------------------------------------------------------------===//
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
34 #include <system_error>
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
53 static cl::list<std::string> ImplicitCheckNot(
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
60 static cl::opt<bool> AllowEmptyInput(
61 "allow-empty", cl::init(false),
62 cl::desc("Allow the input file to be empty. This is useful when making\n"
63 "checks that some error message does not occur, for example."));
65 typedef cl::list<std::string>::const_iterator prefix_iterator;
67 //===----------------------------------------------------------------------===//
68 // Pattern Handling Code.
69 //===----------------------------------------------------------------------===//
81 /// MatchEOF - When set, this pattern only matches the end of file. This is
82 /// used for trailing CHECK-NOTs.
90 Check::CheckType CheckTy;
92 /// FixedStr - If non-empty, this pattern is a fixed string match with the
93 /// specified fixed string.
96 /// RegEx - If non-empty, this is a regex pattern.
99 /// \brief Contains the number of line this pattern is in.
102 /// VariableUses - Entries in this vector map to uses of a variable in the
103 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
104 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
105 /// value of bar at offset 3.
106 std::vector<std::pair<StringRef, unsigned> > VariableUses;
108 /// VariableDefs - Maps definitions of variables to their parenthesized
110 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
111 std::map<StringRef, unsigned> VariableDefs;
115 Pattern(Check::CheckType Ty)
118 /// getLoc - Return the location in source code.
119 SMLoc getLoc() const { return PatternLoc; }
121 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
122 /// which prefix is being matched, SM provides the SourceMgr used for error
123 /// reports, and LineNumber is the line number in the input file from which
124 /// the pattern string was read. Returns true in case of an error, false
126 bool ParsePattern(StringRef PatternStr,
129 unsigned LineNumber);
131 /// Match - Match the pattern string against the input buffer Buffer. This
132 /// returns the position that is matched or npos if there is no match. If
133 /// there is a match, the size of the matched string is returned in MatchLen.
135 /// The VariableTable StringMap provides the current values of filecheck
136 /// variables and is updated if this match defines new values.
137 size_t Match(StringRef Buffer, size_t &MatchLen,
138 StringMap<StringRef> &VariableTable) const;
140 /// PrintFailureInfo - Print additional information about a failure to match
141 /// involving this pattern.
142 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
143 const StringMap<StringRef> &VariableTable) const;
145 bool hasVariable() const { return !(VariableUses.empty() &&
146 VariableDefs.empty()); }
148 Check::CheckType getCheckTy() const { return CheckTy; }
151 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
152 void AddBackrefToRegEx(unsigned BackrefNum);
154 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
155 /// matching this pattern at the start of \arg Buffer; a distance of zero
156 /// should correspond to a perfect match.
157 unsigned ComputeMatchDistance(StringRef Buffer,
158 const StringMap<StringRef> &VariableTable) const;
160 /// \brief Evaluates expression and stores the result to \p Value.
161 /// \return true on success. false when the expression has invalid syntax.
162 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
164 /// \brief Finds the closing sequence of a regex variable usage or
165 /// definition. Str has to point in the beginning of the definition
166 /// (right after the opening sequence).
167 /// \return offset of the closing sequence within Str, or npos if it was not
169 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
173 bool Pattern::ParsePattern(StringRef PatternStr,
176 unsigned LineNumber) {
177 this->LineNumber = LineNumber;
178 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
180 // Ignore trailing whitespace.
181 while (!PatternStr.empty() &&
182 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
183 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
185 // Check that there is something on the line.
186 if (PatternStr.empty()) {
187 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
188 "found empty check string with prefix '" +
193 // Check to see if this is a fixed string, or if it has regex pieces.
194 if (PatternStr.size() < 2 ||
195 (PatternStr.find("{{") == StringRef::npos &&
196 PatternStr.find("[[") == StringRef::npos)) {
197 FixedStr = PatternStr;
201 // Paren value #0 is for the fully matched string. Any new parenthesized
202 // values add from there.
203 unsigned CurParen = 1;
205 // Otherwise, there is at least one regex piece. Build up the regex pattern
206 // by escaping scary characters in fixed strings, building up one big regex.
207 while (!PatternStr.empty()) {
209 if (PatternStr.startswith("{{")) {
210 // This is the start of a regex match. Scan for the }}.
211 size_t End = PatternStr.find("}}");
212 if (End == StringRef::npos) {
213 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
215 "found start of regex string with no end '}}'");
219 // Enclose {{}} patterns in parens just like [[]] even though we're not
220 // capturing the result for any purpose. This is required in case the
221 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
222 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
226 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
230 PatternStr = PatternStr.substr(End+2);
234 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
235 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
236 // second form is [[foo]] which is a reference to foo. The variable name
237 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
238 // it. This is to catch some common errors.
239 if (PatternStr.startswith("[[")) {
240 // Find the closing bracket pair ending the match. End is going to be an
241 // offset relative to the beginning of the match string.
242 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
244 if (End == StringRef::npos) {
245 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
247 "invalid named regex reference, no ]] found");
251 StringRef MatchStr = PatternStr.substr(2, End);
252 PatternStr = PatternStr.substr(End+4);
254 // Get the regex name (e.g. "foo").
255 size_t NameEnd = MatchStr.find(':');
256 StringRef Name = MatchStr.substr(0, NameEnd);
259 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
260 "invalid name in named regex: empty name");
264 // Verify that the name/expression is well formed. FileCheck currently
265 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
266 // is relaxed, more strict check is performed in \c EvaluateExpression.
267 bool IsExpression = false;
268 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
269 if (i == 0 && Name[i] == '@') {
270 if (NameEnd != StringRef::npos) {
271 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
273 "invalid name in named regex definition");
279 if (Name[i] != '_' && !isalnum(Name[i]) &&
280 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
281 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
282 SourceMgr::DK_Error, "invalid name in named regex");
287 // Name can't start with a digit.
288 if (isdigit(static_cast<unsigned char>(Name[0]))) {
289 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
290 "invalid name in named regex");
295 if (NameEnd == StringRef::npos) {
296 // Handle variables that were defined earlier on the same line by
297 // emitting a backreference.
298 if (VariableDefs.find(Name) != VariableDefs.end()) {
299 unsigned VarParenNum = VariableDefs[Name];
300 if (VarParenNum < 1 || VarParenNum > 9) {
301 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
303 "Can't back-reference more than 9 variables");
306 AddBackrefToRegEx(VarParenNum);
308 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
313 // Handle [[foo:.*]].
314 VariableDefs[Name] = CurParen;
318 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
324 // Handle fixed string matches.
325 // Find the end, which is the start of the next regex.
326 size_t FixedMatchEnd = PatternStr.find("{{");
327 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
328 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
329 PatternStr = PatternStr.substr(FixedMatchEnd);
335 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
339 if (!R.isValid(Error)) {
340 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
341 "invalid regex: " + Error);
345 RegExStr += RS.str();
346 CurParen += R.getNumMatches();
350 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
351 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
352 std::string Backref = std::string("\\") +
353 std::string(1, '0' + BackrefNum);
357 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
358 // The only supported expression is @LINE([\+-]\d+)?
359 if (!Expr.startswith("@LINE"))
361 Expr = Expr.substr(StringRef("@LINE").size());
365 Expr = Expr.substr(1);
366 else if (Expr[0] != '-')
368 if (Expr.getAsInteger(10, Offset))
371 Value = llvm::itostr(LineNumber + Offset);
375 /// Match - Match the pattern string against the input buffer Buffer. This
376 /// returns the position that is matched or npos if there is no match. If
377 /// there is a match, the size of the matched string is returned in MatchLen.
378 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
379 StringMap<StringRef> &VariableTable) const {
380 // If this is the EOF pattern, match it immediately.
381 if (CheckTy == Check::CheckEOF) {
383 return Buffer.size();
386 // If this is a fixed string pattern, just match it now.
387 if (!FixedStr.empty()) {
388 MatchLen = FixedStr.size();
389 return Buffer.find(FixedStr);
394 // If there are variable uses, we need to create a temporary string with the
396 StringRef RegExToMatch = RegExStr;
398 if (!VariableUses.empty()) {
401 unsigned InsertOffset = 0;
402 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
405 if (VariableUses[i].first[0] == '@') {
406 if (!EvaluateExpression(VariableUses[i].first, Value))
407 return StringRef::npos;
409 StringMap<StringRef>::iterator it =
410 VariableTable.find(VariableUses[i].first);
411 // If the variable is undefined, return an error.
412 if (it == VariableTable.end())
413 return StringRef::npos;
415 // Look up the value and escape it so that we can put it into the regex.
416 Value += Regex::escape(it->second);
419 // Plop it into the regex at the adjusted offset.
420 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
421 Value.begin(), Value.end());
422 InsertOffset += Value.size();
425 // Match the newly constructed regex.
426 RegExToMatch = TmpStr;
430 SmallVector<StringRef, 4> MatchInfo;
431 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
432 return StringRef::npos;
434 // Successful regex match.
435 assert(!MatchInfo.empty() && "Didn't get any match");
436 StringRef FullMatch = MatchInfo[0];
438 // If this defines any variables, remember their values.
439 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
440 E = VariableDefs.end();
442 assert(I->second < MatchInfo.size() && "Internal paren error");
443 VariableTable[I->first] = MatchInfo[I->second];
446 MatchLen = FullMatch.size();
447 return FullMatch.data()-Buffer.data();
450 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
451 const StringMap<StringRef> &VariableTable) const {
452 // Just compute the number of matching characters. For regular expressions, we
453 // just compare against the regex itself and hope for the best.
455 // FIXME: One easy improvement here is have the regex lib generate a single
456 // example regular expression which matches, and use that as the example
458 StringRef ExampleString(FixedStr);
459 if (ExampleString.empty())
460 ExampleString = RegExStr;
462 // Only compare up to the first line in the buffer, or the string size.
463 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
464 BufferPrefix = BufferPrefix.split('\n').first;
465 return BufferPrefix.edit_distance(ExampleString);
468 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
469 const StringMap<StringRef> &VariableTable) const{
470 // If this was a regular expression using variables, print the current
472 if (!VariableUses.empty()) {
473 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
474 SmallString<256> Msg;
475 raw_svector_ostream OS(Msg);
476 StringRef Var = VariableUses[i].first;
479 if (EvaluateExpression(Var, Value)) {
480 OS << "with expression \"";
481 OS.write_escaped(Var) << "\" equal to \"";
482 OS.write_escaped(Value) << "\"";
484 OS << "uses incorrect expression \"";
485 OS.write_escaped(Var) << "\"";
488 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
490 // Check for undefined variable references.
491 if (it == VariableTable.end()) {
492 OS << "uses undefined variable \"";
493 OS.write_escaped(Var) << "\"";
495 OS << "with variable \"";
496 OS.write_escaped(Var) << "\" equal to \"";
497 OS.write_escaped(it->second) << "\"";
501 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
506 // Attempt to find the closest/best fuzzy match. Usually an error happens
507 // because some string in the output didn't exactly match. In these cases, we
508 // would like to show the user a best guess at what "should have" matched, to
509 // save them having to actually check the input manually.
510 size_t NumLinesForward = 0;
511 size_t Best = StringRef::npos;
512 double BestQuality = 0;
514 // Use an arbitrary 4k limit on how far we will search.
515 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
516 if (Buffer[i] == '\n')
519 // Patterns have leading whitespace stripped, so skip whitespace when
520 // looking for something which looks like a pattern.
521 if (Buffer[i] == ' ' || Buffer[i] == '\t')
524 // Compute the "quality" of this match as an arbitrary combination of the
525 // match distance and the number of lines skipped to get to this match.
526 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
527 double Quality = Distance + (NumLinesForward / 100.);
529 if (Quality < BestQuality || Best == StringRef::npos) {
531 BestQuality = Quality;
535 // Print the "possible intended match here" line if we found something
536 // reasonable and not equal to what we showed in the "scanning from here"
538 if (Best && Best != StringRef::npos && BestQuality < 50) {
539 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
540 SourceMgr::DK_Note, "possible intended match here");
542 // FIXME: If we wanted to be really friendly we would show why the match
543 // failed, as it can be hard to spot simple one character differences.
547 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
548 // Offset keeps track of the current offset within the input Str
550 // [...] Nesting depth
551 size_t BracketDepth = 0;
553 while (!Str.empty()) {
554 if (Str.startswith("]]") && BracketDepth == 0)
556 if (Str[0] == '\\') {
557 // Backslash escapes the next char within regexes, so skip them both.
568 if (BracketDepth == 0) {
569 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
571 "missing closing \"]\" for regex variable");
582 return StringRef::npos;
586 //===----------------------------------------------------------------------===//
588 //===----------------------------------------------------------------------===//
590 /// CheckString - This is a check that we found in the input file.
592 /// Pat - The pattern to match.
595 /// Prefix - Which prefix name this check matched.
598 /// Loc - The location in the match file that the check string was specified.
601 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
602 /// as opposed to a CHECK: directive.
603 Check::CheckType CheckTy;
605 /// DagNotStrings - These are all of the strings that are disallowed from
606 /// occurring between this match string and the previous one (or start of
608 std::vector<Pattern> DagNotStrings;
611 CheckString(const Pattern &P,
615 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
617 /// Check - Match check string and its "not strings" and/or "dag strings".
618 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
619 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
621 /// CheckNext - Verify there is a single line in the given buffer.
622 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
624 /// CheckSame - Verify there is no newline in the given buffer.
625 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
627 /// CheckNot - Verify there's no "not strings" in the given buffer.
628 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
629 const std::vector<const Pattern *> &NotStrings,
630 StringMap<StringRef> &VariableTable) const;
632 /// CheckDag - Match "dag strings" and their mixed "not strings".
633 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
634 std::vector<const Pattern *> &NotStrings,
635 StringMap<StringRef> &VariableTable) const;
638 /// Canonicalize whitespaces in the input file. Line endings are replaced
639 /// with UNIX-style '\n'.
641 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
642 /// characters to a single space.
643 static std::unique_ptr<MemoryBuffer>
644 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
645 bool PreserveHorizontal) {
646 SmallString<128> NewFile;
647 NewFile.reserve(MB->getBufferSize());
649 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
651 // Eliminate trailing dosish \r.
652 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
656 // If current char is not a horizontal whitespace or if horizontal
657 // whitespace canonicalization is disabled, dump it to output as is.
658 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
659 NewFile.push_back(*Ptr);
663 // Otherwise, add one space and advance over neighboring space.
664 NewFile.push_back(' ');
665 while (Ptr+1 != End &&
666 (Ptr[1] == ' ' || Ptr[1] == '\t'))
670 return std::unique_ptr<MemoryBuffer>(
671 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
674 static bool IsPartOfWord(char c) {
675 return (isalnum(c) || c == '-' || c == '_');
678 // Get the size of the prefix extension.
679 static size_t CheckTypeSize(Check::CheckType Ty) {
681 case Check::CheckNone:
684 case Check::CheckPlain:
685 return sizeof(":") - 1;
687 case Check::CheckNext:
688 return sizeof("-NEXT:") - 1;
690 case Check::CheckSame:
691 return sizeof("-SAME:") - 1;
693 case Check::CheckNot:
694 return sizeof("-NOT:") - 1;
696 case Check::CheckDAG:
697 return sizeof("-DAG:") - 1;
699 case Check::CheckLabel:
700 return sizeof("-LABEL:") - 1;
702 case Check::CheckEOF:
703 llvm_unreachable("Should not be using EOF size");
706 llvm_unreachable("Bad check type");
709 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
710 char NextChar = Buffer[Prefix.size()];
712 // Verify that the : is present after the prefix.
714 return Check::CheckPlain;
717 return Check::CheckNone;
719 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
720 if (Rest.startswith("NEXT:"))
721 return Check::CheckNext;
723 if (Rest.startswith("SAME:"))
724 return Check::CheckSame;
726 if (Rest.startswith("NOT:"))
727 return Check::CheckNot;
729 if (Rest.startswith("DAG:"))
730 return Check::CheckDAG;
732 if (Rest.startswith("LABEL:"))
733 return Check::CheckLabel;
735 return Check::CheckNone;
738 // From the given position, find the next character after the word.
739 static size_t SkipWord(StringRef Str, size_t Loc) {
740 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
745 // Try to find the first match in buffer for any prefix. If a valid match is
746 // found, return that prefix and set its type and location. If there are almost
747 // matches (e.g. the actual prefix string is found, but is not an actual check
748 // string), but no valid match, return an empty string and set the position to
749 // resume searching from. If no partial matches are found, return an empty
750 // string and the location will be StringRef::npos. If one prefix is a substring
751 // of another, the maximal match should be found. e.g. if "A" and "AA" are
752 // prefixes then AA-CHECK: should match the second one.
753 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
754 Check::CheckType &CheckTy,
756 StringRef FirstPrefix;
757 size_t FirstLoc = StringRef::npos;
758 size_t SearchLoc = StringRef::npos;
759 Check::CheckType FirstTy = Check::CheckNone;
761 CheckTy = Check::CheckNone;
762 CheckLoc = StringRef::npos;
764 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
766 StringRef Prefix(*I);
767 size_t PrefixLoc = Buffer.find(Prefix);
769 if (PrefixLoc == StringRef::npos)
772 // Track where we are searching for invalid prefixes that look almost right.
773 // We need to only advance to the first partial match on the next attempt
774 // since a partial match could be a substring of a later, valid prefix.
775 // Need to skip to the end of the word, otherwise we could end up
776 // matching a prefix in a substring later.
777 if (PrefixLoc < SearchLoc)
778 SearchLoc = SkipWord(Buffer, PrefixLoc);
780 // We only want to find the first match to avoid skipping some.
781 if (PrefixLoc > FirstLoc)
783 // If one matching check-prefix is a prefix of another, choose the
785 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
788 StringRef Rest = Buffer.drop_front(PrefixLoc);
789 // Make sure we have actually found the prefix, and not a word containing
790 // it. This should also prevent matching the wrong prefix when one is a
791 // substring of another.
792 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
793 FirstTy = Check::CheckNone;
795 FirstTy = FindCheckType(Rest, Prefix);
797 FirstLoc = PrefixLoc;
798 FirstPrefix = Prefix;
801 // If the first prefix is invalid, we should continue the search after it.
802 if (FirstTy == Check::CheckNone) {
803 CheckLoc = SearchLoc;
812 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
813 unsigned &LineNumber,
814 Check::CheckType &CheckTy,
816 while (!Buffer.empty()) {
817 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
818 // If we found a real match, we are done.
819 if (!Prefix.empty()) {
820 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
824 // We didn't find any almost matches either, we are also done.
825 if (CheckLoc == StringRef::npos)
828 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
830 // Advance to the last possible match we found and try again.
831 Buffer = Buffer.drop_front(CheckLoc + 1);
837 /// ReadCheckFile - Read the check file, which specifies the sequence of
838 /// expected strings. The strings are added to the CheckStrings vector.
839 /// Returns true in case of an error, false otherwise.
840 static bool ReadCheckFile(SourceMgr &SM,
841 std::vector<CheckString> &CheckStrings) {
842 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
843 MemoryBuffer::getFileOrSTDIN(CheckFilename);
844 if (std::error_code EC = FileOrErr.getError()) {
845 errs() << "Could not open check file '" << CheckFilename
846 << "': " << EC.message() << '\n';
850 // If we want to canonicalize whitespace, strip excess whitespace from the
851 // buffer containing the CHECK lines. Remove DOS style line endings.
852 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
853 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
855 // Find all instances of CheckPrefix followed by : in the file.
856 StringRef Buffer = F->getBuffer();
858 SM.AddNewSourceBuffer(std::move(F), SMLoc());
860 std::vector<Pattern> ImplicitNegativeChecks;
861 for (const auto &PatternString : ImplicitCheckNot) {
862 // Create a buffer with fake command line content in order to display the
863 // command line option responsible for the specific implicit CHECK-NOT.
864 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
865 std::string Suffix = "'";
866 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
867 Prefix + PatternString + Suffix, "command line");
869 StringRef PatternInBuffer =
870 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
871 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
873 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
874 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
875 "IMPLICIT-CHECK", SM, 0);
879 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
881 // LineNumber keeps track of the line on which CheckPrefix instances are
883 unsigned LineNumber = 1;
886 Check::CheckType CheckTy;
889 // See if a prefix occurs in the memory buffer.
890 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
894 if (UsedPrefix.empty())
897 Buffer = Buffer.drop_front(PrefixLoc);
899 // Location to use for error messages.
900 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
902 // PrefixLoc is to the start of the prefix. Skip to the end.
903 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
905 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
906 // leading and trailing whitespace.
907 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
909 // Scan ahead to the end of line.
910 size_t EOL = Buffer.find_first_of("\n\r");
912 // Remember the location of the start of the pattern, for diagnostics.
913 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
915 // Parse the pattern.
917 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
920 // Verify that CHECK-LABEL lines do not define or use variables
921 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
922 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
924 "found '" + UsedPrefix + "-LABEL:'"
925 " with variable definition or use");
929 Buffer = Buffer.substr(EOL);
931 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
932 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
933 CheckStrings.empty()) {
934 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
935 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
937 "found '" + UsedPrefix + "-" + Type + "' without previous '"
938 + UsedPrefix + ": line");
942 // Handle CHECK-DAG/-NOT.
943 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
944 DagNotMatches.push_back(P);
948 // Okay, add the string we captured to the output vector and move on.
949 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc, CheckTy);
950 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
951 DagNotMatches = ImplicitNegativeChecks;
954 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
955 // prefix as a filler for the error message.
956 if (!DagNotMatches.empty()) {
957 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
958 SMLoc::getFromPointer(Buffer.data()),
960 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
963 if (CheckStrings.empty()) {
964 errs() << "error: no check strings found with prefix"
965 << (CheckPrefixes.size() > 1 ? "es " : " ");
966 prefix_iterator I = CheckPrefixes.begin();
967 prefix_iterator E = CheckPrefixes.end();
969 errs() << "\'" << *I << ":'";
973 errs() << ", \'" << *I << ":'";
982 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
983 const Pattern &Pat, StringRef Buffer,
984 StringMap<StringRef> &VariableTable) {
985 // Otherwise, we have an error, emit an error message.
986 SM.PrintMessage(Loc, SourceMgr::DK_Error,
987 "expected string not found in input");
989 // Print the "scanning from here" line. If the current position is at the
990 // end of a line, advance to the start of the next line.
991 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
993 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
994 "scanning from here");
996 // Allow the pattern to print additional information if desired.
997 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1000 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1002 StringMap<StringRef> &VariableTable) {
1003 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1006 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1008 static unsigned CountNumNewlinesBetween(StringRef Range,
1009 const char *&FirstNewLine) {
1010 unsigned NumNewLines = 0;
1012 // Scan for newline.
1013 Range = Range.substr(Range.find_first_of("\n\r"));
1014 if (Range.empty()) return NumNewLines;
1018 // Handle \n\r and \r\n as a single newline.
1019 if (Range.size() > 1 &&
1020 (Range[1] == '\n' || Range[1] == '\r') &&
1021 (Range[0] != Range[1]))
1022 Range = Range.substr(1);
1023 Range = Range.substr(1);
1025 if (NumNewLines == 1)
1026 FirstNewLine = Range.begin();
1030 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1031 bool IsLabelScanMode, size_t &MatchLen,
1032 StringMap<StringRef> &VariableTable) const {
1034 std::vector<const Pattern *> NotStrings;
1036 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1037 // bounds; we have not processed variable definitions within the bounded block
1038 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1039 // over the block again (including the last CHECK-LABEL) in normal mode.
1040 if (!IsLabelScanMode) {
1041 // Match "dag strings" (with mixed "not strings" if any).
1042 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1043 if (LastPos == StringRef::npos)
1044 return StringRef::npos;
1047 // Match itself from the last position after matching CHECK-DAG.
1048 StringRef MatchBuffer = Buffer.substr(LastPos);
1049 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1050 if (MatchPos == StringRef::npos) {
1051 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1052 return StringRef::npos;
1055 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1057 if (!IsLabelScanMode) {
1058 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1060 // If this check is a "CHECK-NEXT", verify that the previous match was on
1061 // the previous line (i.e. that there is one newline between them).
1062 if (CheckNext(SM, SkippedRegion))
1063 return StringRef::npos;
1065 // If this check is a "CHECK-SAME", verify that the previous match was on
1066 // the same line (i.e. that there is no newline between them).
1067 if (CheckSame(SM, SkippedRegion))
1068 return StringRef::npos;
1070 // If this match had "not strings", verify that they don't exist in the
1072 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1073 return StringRef::npos;
1076 return LastPos + MatchPos;
1079 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1080 if (CheckTy != Check::CheckNext)
1083 // Count the number of newlines between the previous match and this one.
1084 assert(Buffer.data() !=
1086 SM.FindBufferContainingLoc(
1087 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1088 "CHECK-NEXT can't be the first check in a file");
1090 const char *FirstNewLine = nullptr;
1091 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1093 if (NumNewLines == 0) {
1094 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1095 "-NEXT: is on the same line as previous match");
1096 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1097 SourceMgr::DK_Note, "'next' match was here");
1098 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1099 "previous match ended here");
1103 if (NumNewLines != 1) {
1104 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1105 "-NEXT: is not on the line after the previous match");
1106 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1107 SourceMgr::DK_Note, "'next' match was here");
1108 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1109 "previous match ended here");
1110 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1111 "non-matching line after previous match is here");
1118 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1119 if (CheckTy != Check::CheckSame)
1122 // Count the number of newlines between the previous match and this one.
1123 assert(Buffer.data() !=
1124 SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1125 SMLoc::getFromPointer(Buffer.data())))
1126 ->getBufferStart() &&
1127 "CHECK-SAME can't be the first check in a file");
1129 const char *FirstNewLine = nullptr;
1130 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1132 if (NumNewLines != 0) {
1133 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1135 "-SAME: is not on the same line as the previous match");
1136 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1137 "'next' match was here");
1138 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1139 "previous match ended here");
1146 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1147 const std::vector<const Pattern *> &NotStrings,
1148 StringMap<StringRef> &VariableTable) const {
1149 for (unsigned ChunkNo = 0, e = NotStrings.size();
1150 ChunkNo != e; ++ChunkNo) {
1151 const Pattern *Pat = NotStrings[ChunkNo];
1152 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1154 size_t MatchLen = 0;
1155 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1157 if (Pos == StringRef::npos) continue;
1159 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1160 SourceMgr::DK_Error,
1161 Prefix + "-NOT: string occurred!");
1162 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1163 Prefix + "-NOT: pattern specified here");
1170 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1171 std::vector<const Pattern *> &NotStrings,
1172 StringMap<StringRef> &VariableTable) const {
1173 if (DagNotStrings.empty())
1177 size_t StartPos = LastPos;
1179 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1180 ChunkNo != e; ++ChunkNo) {
1181 const Pattern &Pat = DagNotStrings[ChunkNo];
1183 assert((Pat.getCheckTy() == Check::CheckDAG ||
1184 Pat.getCheckTy() == Check::CheckNot) &&
1185 "Invalid CHECK-DAG or CHECK-NOT!");
1187 if (Pat.getCheckTy() == Check::CheckNot) {
1188 NotStrings.push_back(&Pat);
1192 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1194 size_t MatchLen = 0, MatchPos;
1196 // CHECK-DAG always matches from the start.
1197 StringRef MatchBuffer = Buffer.substr(StartPos);
1198 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1199 // With a group of CHECK-DAGs, a single mismatching means the match on
1200 // that group of CHECK-DAGs fails immediately.
1201 if (MatchPos == StringRef::npos) {
1202 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1203 return StringRef::npos;
1205 // Re-calc it as the offset relative to the start of the original string.
1206 MatchPos += StartPos;
1208 if (!NotStrings.empty()) {
1209 if (MatchPos < LastPos) {
1211 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1212 SourceMgr::DK_Error,
1213 Prefix + "-DAG: found a match of CHECK-DAG"
1214 " reordering across a CHECK-NOT");
1215 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1217 Prefix + "-DAG: the farthest match of CHECK-DAG"
1219 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1220 Prefix + "-NOT: the crossed pattern specified"
1222 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1223 Prefix + "-DAG: the reordered pattern specified"
1225 return StringRef::npos;
1227 // All subsequent CHECK-DAGs should be matched from the farthest
1228 // position of all precedent CHECK-DAGs (including this one.)
1230 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1231 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1233 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1234 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1235 return StringRef::npos;
1236 // Clear "not strings".
1240 // Update the last position with CHECK-DAG matches.
1241 LastPos = std::max(MatchPos + MatchLen, LastPos);
1247 // A check prefix must contain only alphanumeric, hyphens and underscores.
1248 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1249 Regex Validator("^[a-zA-Z0-9_-]*$");
1250 return Validator.match(CheckPrefix);
1253 static bool ValidateCheckPrefixes() {
1254 StringSet<> PrefixSet;
1256 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1258 StringRef Prefix(*I);
1260 // Reject empty prefixes.
1264 if (!PrefixSet.insert(Prefix).second)
1267 if (!ValidateCheckPrefix(Prefix))
1274 // I don't think there's a way to specify an initial value for cl::list,
1275 // so if nothing was specified, add the default
1276 static void AddCheckPrefixIfNeeded() {
1277 if (CheckPrefixes.empty())
1278 CheckPrefixes.push_back("CHECK");
1281 int main(int argc, char **argv) {
1282 sys::PrintStackTraceOnErrorSignal();
1283 PrettyStackTraceProgram X(argc, argv);
1284 cl::ParseCommandLineOptions(argc, argv);
1286 if (!ValidateCheckPrefixes()) {
1287 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1288 "start with a letter and contain only alphanumeric characters, "
1289 "hyphens and underscores\n";
1293 AddCheckPrefixIfNeeded();
1297 // Read the expected strings from the check file.
1298 std::vector<CheckString> CheckStrings;
1299 if (ReadCheckFile(SM, CheckStrings))
1302 // Open the file to check and add it to SourceMgr.
1303 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1304 MemoryBuffer::getFileOrSTDIN(InputFilename);
1305 if (std::error_code EC = FileOrErr.getError()) {
1306 errs() << "Could not open input file '" << InputFilename
1307 << "': " << EC.message() << '\n';
1310 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1312 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1313 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1317 // Remove duplicate spaces in the input file if requested.
1318 // Remove DOS style line endings.
1319 std::unique_ptr<MemoryBuffer> F =
1320 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1322 // Check that we have all of the expected strings, in order, in the input
1324 StringRef Buffer = F->getBuffer();
1326 SM.AddNewSourceBuffer(std::move(F), SMLoc());
1328 /// VariableTable - This holds all the current filecheck variables.
1329 StringMap<StringRef> VariableTable;
1331 bool hasError = false;
1333 unsigned i = 0, j = 0, e = CheckStrings.size();
1336 StringRef CheckRegion;
1338 CheckRegion = Buffer;
1340 const CheckString &CheckLabelStr = CheckStrings[j];
1341 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1346 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1347 size_t MatchLabelLen = 0;
1348 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1349 MatchLabelLen, VariableTable);
1350 if (MatchLabelPos == StringRef::npos) {
1355 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1356 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1360 for ( ; i != j; ++i) {
1361 const CheckString &CheckStr = CheckStrings[i];
1363 // Check each string within the scanned region, including a second check
1364 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1365 size_t MatchLen = 0;
1366 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1369 if (MatchPos == StringRef::npos) {
1375 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1382 return hasError ? 1 : 0;