From: Douglas Gregor Date: Wed, 2 May 2012 17:32:48 +0000 (+0000) Subject: Move llvm-tblgen's StringMatcher into the TableGen library so it can X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f657da2e4896732f306a9e62261418112e7337ce;p=oota-llvm.git Move llvm-tblgen's StringMatcher into the TableGen library so it can be used by clang-tblgen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156000 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/TableGen/StringMatcher.h b/include/llvm/TableGen/StringMatcher.h new file mode 100644 index 00000000000..1dadc76200b --- /dev/null +++ b/include/llvm/TableGen/StringMatcher.h @@ -0,0 +1,54 @@ +//===- StringMatcher.h - Generate a matcher for input strings ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringMatcher class. +// +//===----------------------------------------------------------------------===// + +#ifndef STRINGMATCHER_H +#define STRINGMATCHER_H + +#include +#include +#include +#include "llvm/ADT/StringRef.h" + +namespace llvm { + class raw_ostream; + +/// StringMatcher - Given a list of strings and code to execute when they match, +/// output a simple switch tree to classify the input string. +/// +/// If a match is found, the code in Vals[i].second is executed; control must +/// not exit this code fragment. If nothing matches, execution falls through. +/// +class StringMatcher { +public: + typedef std::pair StringPair; +private: + StringRef StrVariableName; + const std::vector &Matches; + raw_ostream &OS; + +public: + StringMatcher(StringRef strVariableName, + const std::vector &matches, raw_ostream &os) + : StrVariableName(strVariableName), Matches(matches), OS(os) {} + + void Emit(unsigned Indent = 0) const; + + +private: + bool EmitStringMatcherForChar(const std::vector &Matches, + unsigned CharNo, unsigned IndentCount) const; +}; + +} // end llvm namespace. + +#endif diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt index 82f72b03eef..ba7bf14e5dc 100644 --- a/lib/TableGen/CMakeLists.txt +++ b/lib/TableGen/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMTableGen Error.cpp Main.cpp Record.cpp + StringMatcher.cpp TableGenAction.cpp TableGenBackend.cpp TGLexer.cpp diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp new file mode 100644 index 00000000000..0ce626d4b19 --- /dev/null +++ b/lib/TableGen/StringMatcher.cpp @@ -0,0 +1,149 @@ +//===- StringMatcher.cpp - Generate a matcher for input strings -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringMatcher class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/TableGen/StringMatcher.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +/// FindFirstNonCommonLetter - Find the first character in the keys of the +/// string pairs that is not shared across the whole set of strings. All +/// strings are assumed to have the same length. +static unsigned +FindFirstNonCommonLetter(const std::vector &Matches) { + assert(!Matches.empty()); + for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { + // Check to see if letter i is the same across the set. + char Letter = Matches[0]->first[i]; + + for (unsigned str = 0, e = Matches.size(); str != e; ++str) + if (Matches[str]->first[i] != Letter) + return i; + } + + return Matches[0]->first.size(); +} + +/// EmitStringMatcherForChar - Given a set of strings that are known to be the +/// same length and whose characters leading up to CharNo are the same, emit +/// code to verify that CharNo and later are the same. +/// +/// \return - True if control can leave the emitted code fragment. +bool StringMatcher:: +EmitStringMatcherForChar(const std::vector &Matches, + unsigned CharNo, unsigned IndentCount) const { + assert(!Matches.empty() && "Must have at least one string to match!"); + std::string Indent(IndentCount*2+4, ' '); + + // If we have verified that the entire string matches, we're done: output the + // matching code. + if (CharNo == Matches[0]->first.size()) { + assert(Matches.size() == 1 && "Had duplicate keys to match on"); + + // If the to-execute code has \n's in it, indent each subsequent line. + StringRef Code = Matches[0]->second; + + std::pair Split = Code.split('\n'); + OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n"; + + Code = Split.second; + while (!Code.empty()) { + Split = Code.split('\n'); + OS << Indent << Split.first << "\n"; + Code = Split.second; + } + return false; + } + + // Bucket the matches by the character we are comparing. + std::map > MatchesByLetter; + + for (unsigned i = 0, e = Matches.size(); i != e; ++i) + MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); + + + // If we have exactly one bucket to match, see how many characters are common + // across the whole set and match all of them at once. + if (MatchesByLetter.size() == 1) { + unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); + unsigned NumChars = FirstNonCommonLetter-CharNo; + + // Emit code to break out if the prefix doesn't match. + if (NumChars == 1) { + // Do the comparison with if (Str[1] != 'f') + // FIXME: Need to escape general characters. + OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '" + << Matches[0]->first[CharNo] << "')\n"; + OS << Indent << " break;\n"; + } else { + // Do the comparison with if (Str.substr(1, 3) != "foo"). + // FIXME: Need to escape general strings. + OS << Indent << "if (" << StrVariableName << ".substr(" << CharNo << ", " + << NumChars << ") != \""; + OS << Matches[0]->first.substr(CharNo, NumChars) << "\")\n"; + OS << Indent << " break;\n"; + } + + return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount); + } + + // Otherwise, we have multiple possible things, emit a switch on the + // character. + OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; + OS << Indent << "default: break;\n"; + + for (std::map >::iterator LI = + MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { + // TODO: escape hard stuff (like \n) if we ever care about it. + OS << Indent << "case '" << LI->first << "':\t // " + << LI->second.size() << " string"; + if (LI->second.size() != 1) OS << 's'; + OS << " to match.\n"; + if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1)) + OS << Indent << " break;\n"; + } + + OS << Indent << "}\n"; + return true; +} + + +/// Emit - Top level entry point. +/// +void StringMatcher::Emit(unsigned Indent) const { + // If nothing to match, just fall through. + if (Matches.empty()) return; + + // First level categorization: group strings by length. + std::map > MatchesByLength; + + for (unsigned i = 0, e = Matches.size(); i != e; ++i) + MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); + + // Output a switch statement on length and categorize the elements within each + // bin. + OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; + OS.indent(Indent*2+2) << "default: break;\n"; + + for (std::map >::iterator LI = + MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { + OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " + << LI->second.size() + << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n"; + if (EmitStringMatcherForChar(LI->second, 0, Indent)) + OS.indent(Indent*2+4) << "break;\n"; + } + + OS.indent(Indent*2+2) << "}\n"; +} diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index 837516882ea..0cb86d03d72 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -98,7 +98,6 @@ #include "AsmMatcherEmitter.h" #include "CodeGenTarget.h" -#include "StringMatcher.h" #include "StringToOffsetTable.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/PointerUnion.h" @@ -111,6 +110,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/StringMatcher.h" #include #include using namespace llvm; diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt index 2b70f1c52bd..c5585f5eaa3 100644 --- a/utils/TableGen/CMakeLists.txt +++ b/utils/TableGen/CMakeLists.txt @@ -27,7 +27,6 @@ add_tablegen(llvm-tblgen LLVM PseudoLoweringEmitter.cpp RegisterInfoEmitter.cpp SetTheory.cpp - StringMatcher.cpp SubtargetEmitter.cpp TGValueTypes.cpp TableGen.cpp diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index 8e1bae8c1f0..05ba5d2490e 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -13,8 +13,8 @@ #include "CodeGenTarget.h" #include "IntrinsicEmitter.h" -#include "StringMatcher.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/StringMatcher.h" #include "llvm/ADT/StringExtras.h" #include using namespace llvm; diff --git a/utils/TableGen/StringMatcher.cpp b/utils/TableGen/StringMatcher.cpp deleted file mode 100644 index 6aedcbf458a..00000000000 --- a/utils/TableGen/StringMatcher.cpp +++ /dev/null @@ -1,149 +0,0 @@ -//===- StringMatcher.cpp - Generate a matcher for input strings -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the StringMatcher class. -// -//===----------------------------------------------------------------------===// - -#include "StringMatcher.h" -#include "llvm/Support/raw_ostream.h" -#include -using namespace llvm; - -/// FindFirstNonCommonLetter - Find the first character in the keys of the -/// string pairs that is not shared across the whole set of strings. All -/// strings are assumed to have the same length. -static unsigned -FindFirstNonCommonLetter(const std::vector &Matches) { - assert(!Matches.empty()); - for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { - // Check to see if letter i is the same across the set. - char Letter = Matches[0]->first[i]; - - for (unsigned str = 0, e = Matches.size(); str != e; ++str) - if (Matches[str]->first[i] != Letter) - return i; - } - - return Matches[0]->first.size(); -} - -/// EmitStringMatcherForChar - Given a set of strings that are known to be the -/// same length and whose characters leading up to CharNo are the same, emit -/// code to verify that CharNo and later are the same. -/// -/// \return - True if control can leave the emitted code fragment. -bool StringMatcher:: -EmitStringMatcherForChar(const std::vector &Matches, - unsigned CharNo, unsigned IndentCount) const { - assert(!Matches.empty() && "Must have at least one string to match!"); - std::string Indent(IndentCount*2+4, ' '); - - // If we have verified that the entire string matches, we're done: output the - // matching code. - if (CharNo == Matches[0]->first.size()) { - assert(Matches.size() == 1 && "Had duplicate keys to match on"); - - // If the to-execute code has \n's in it, indent each subsequent line. - StringRef Code = Matches[0]->second; - - std::pair Split = Code.split('\n'); - OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n"; - - Code = Split.second; - while (!Code.empty()) { - Split = Code.split('\n'); - OS << Indent << Split.first << "\n"; - Code = Split.second; - } - return false; - } - - // Bucket the matches by the character we are comparing. - std::map > MatchesByLetter; - - for (unsigned i = 0, e = Matches.size(); i != e; ++i) - MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); - - - // If we have exactly one bucket to match, see how many characters are common - // across the whole set and match all of them at once. - if (MatchesByLetter.size() == 1) { - unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); - unsigned NumChars = FirstNonCommonLetter-CharNo; - - // Emit code to break out if the prefix doesn't match. - if (NumChars == 1) { - // Do the comparison with if (Str[1] != 'f') - // FIXME: Need to escape general characters. - OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '" - << Matches[0]->first[CharNo] << "')\n"; - OS << Indent << " break;\n"; - } else { - // Do the comparison with if (Str.substr(1, 3) != "foo"). - // FIXME: Need to escape general strings. - OS << Indent << "if (" << StrVariableName << ".substr(" << CharNo << ", " - << NumChars << ") != \""; - OS << Matches[0]->first.substr(CharNo, NumChars) << "\")\n"; - OS << Indent << " break;\n"; - } - - return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount); - } - - // Otherwise, we have multiple possible things, emit a switch on the - // character. - OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; - OS << Indent << "default: break;\n"; - - for (std::map >::iterator LI = - MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { - // TODO: escape hard stuff (like \n) if we ever care about it. - OS << Indent << "case '" << LI->first << "':\t // " - << LI->second.size() << " string"; - if (LI->second.size() != 1) OS << 's'; - OS << " to match.\n"; - if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1)) - OS << Indent << " break;\n"; - } - - OS << Indent << "}\n"; - return true; -} - - -/// Emit - Top level entry point. -/// -void StringMatcher::Emit(unsigned Indent) const { - // If nothing to match, just fall through. - if (Matches.empty()) return; - - // First level categorization: group strings by length. - std::map > MatchesByLength; - - for (unsigned i = 0, e = Matches.size(); i != e; ++i) - MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); - - // Output a switch statement on length and categorize the elements within each - // bin. - OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; - OS.indent(Indent*2+2) << "default: break;\n"; - - for (std::map >::iterator LI = - MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { - OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " - << LI->second.size() - << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n"; - if (EmitStringMatcherForChar(LI->second, 0, Indent)) - OS.indent(Indent*2+4) << "break;\n"; - } - - OS.indent(Indent*2+2) << "}\n"; -} diff --git a/utils/TableGen/StringMatcher.h b/utils/TableGen/StringMatcher.h deleted file mode 100644 index 1dadc76200b..00000000000 --- a/utils/TableGen/StringMatcher.h +++ /dev/null @@ -1,54 +0,0 @@ -//===- StringMatcher.h - Generate a matcher for input strings ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the StringMatcher class. -// -//===----------------------------------------------------------------------===// - -#ifndef STRINGMATCHER_H -#define STRINGMATCHER_H - -#include -#include -#include -#include "llvm/ADT/StringRef.h" - -namespace llvm { - class raw_ostream; - -/// StringMatcher - Given a list of strings and code to execute when they match, -/// output a simple switch tree to classify the input string. -/// -/// If a match is found, the code in Vals[i].second is executed; control must -/// not exit this code fragment. If nothing matches, execution falls through. -/// -class StringMatcher { -public: - typedef std::pair StringPair; -private: - StringRef StrVariableName; - const std::vector &Matches; - raw_ostream &OS; - -public: - StringMatcher(StringRef strVariableName, - const std::vector &matches, raw_ostream &os) - : StrVariableName(strVariableName), Matches(matches), OS(os) {} - - void Emit(unsigned Indent = 0) const; - - -private: - bool EmitStringMatcherForChar(const std::vector &Matches, - unsigned CharNo, unsigned IndentCount) const; -}; - -} // end llvm namespace. - -#endif