X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=utils%2FTableGen%2FTGLexer.cpp;h=82d2b6491aaca36ec8bd6659e312214a9eef127c;hb=8b8515c225c799e9df69bde8ffffa3c72cec9445;hp=bd12f2c38eb293eed61117b2261c75a39ffd2240;hpb=c1819188b6c971b91c680a9a3c077b84a110e5fd;p=oota-llvm.git diff --git a/utils/TableGen/TGLexer.cpp b/utils/TableGen/TGLexer.cpp index bd12f2c38eb..82d2b6491aa 100644 --- a/utils/TableGen/TGLexer.cpp +++ b/utils/TableGen/TGLexer.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by Chris Lattner and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -12,52 +12,53 @@ //===----------------------------------------------------------------------===// #include "TGLexer.h" -#include "Record.h" -#include "llvm/Support/Streams.h" -#include "Record.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/MemoryBuffer.h" -typedef std::pair*> SubClassRefTy; -#include "FileParser.h" +#include "llvm/Config/config.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include +#include +#include +#include +#include using namespace llvm; -// FIXME: REMOVE THIS. -#define YYEOF 0 -#define YYERROR -2 - -TGLexer::TGLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { +TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) { + CurBuffer = 0; + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); CurPtr = CurBuf->getBufferStart(); + TokStart = 0; } -TGLexer::~TGLexer() { - while (!IncludeStack.empty()) { - delete IncludeStack.back().Buffer; - IncludeStack.pop_back(); - } - delete CurBuf; +SMLoc TGLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); } -std::ostream &TGLexer::err() { - PrintIncludeStack(*cerr.stream()); - return *cerr.stream(); +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return tgtok::Error. +tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { + PrintError(Loc, Msg); + return tgtok::Error; } -void TGLexer::PrintIncludeStack(std::ostream &OS) { - for (unsigned i = 0, e = IncludeStack.size(); i != e; ++i) - OS << "Included from " << IncludeStack[i].Buffer->getBufferIdentifier() - << ":" << IncludeStack[i].LineNo << ":\n"; - OS << "Parsing " << CurBuf->getBufferIdentifier() << ":" - << CurLineNo << ": "; +void TGLexer::PrintError(const char *Loc, const Twine &Msg) const { + SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error"); +} + +void TGLexer::PrintError(SMLoc Loc, const Twine &Msg) const { + SrcMgr.PrintMessage(Loc, Msg, "error"); } + int TGLexer::getNextChar() { char CurChar = *CurPtr++; switch (CurChar) { default: return (unsigned char)CurChar; - case 0: + case 0: { // A nul character in the stream is either the end of the current buffer or // a random nul in the file. Disambiguate that here. if (CurPtr-1 != CurBuf->getBufferEnd()) @@ -65,18 +66,18 @@ int TGLexer::getNextChar() { // If this is the end of an included file, pop the parent file off the // include stack. - if (!IncludeStack.empty()) { - delete CurBuf; - CurBuf = IncludeStack.back().Buffer; - CurLineNo = IncludeStack.back().LineNo; - CurPtr = IncludeStack.back().CurPtr; - IncludeStack.pop_back(); + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = ParentIncludeLoc.getPointer(); return getNextChar(); } // Otherwise, return end of file. --CurPtr; // Another call to lex will return EOF again. return EOF; + } case '\n': case '\r': // Handle the newline character by ignoring it and incrementing the line @@ -85,25 +86,38 @@ int TGLexer::getNextChar() { if ((*CurPtr == '\n' || (*CurPtr == '\r')) && *CurPtr != CurChar) ++CurPtr; // Eat the two char newline sequence. - - ++CurLineNo; return '\n'; } } -int TGLexer::LexToken() { +tgtok::TokKind TGLexer::LexToken() { + TokStart = CurPtr; // This always consumes at least one character. int CurChar = getNextChar(); switch (CurChar) { default: - // Handle letters: [a-zA-Z_] - if (isalpha(CurChar) || CurChar == '_') + // Handle letters: [a-zA-Z_#] + if (isalpha(CurChar) || CurChar == '_' || CurChar == '#') return LexIdentifier(); - // Unknown character, return the char itself. - return (unsigned char)CurChar; - case EOF: return YYEOF; + // Unknown character, emit an error. + return ReturnError(TokStart, "Unexpected character"); + case EOF: return tgtok::Eof; + case ':': return tgtok::colon; + case ';': return tgtok::semi; + case '.': return tgtok::period; + case ',': return tgtok::comma; + case '<': return tgtok::less; + case '>': return tgtok::greater; + case ']': return tgtok::r_square; + case '{': return tgtok::l_brace; + case '}': return tgtok::r_brace; + case '(': return tgtok::l_paren; + case ')': return tgtok::r_paren; + case '=': return tgtok::equal; + case '?': return tgtok::question; + case 0: case ' ': case '\t': @@ -118,9 +132,9 @@ int TGLexer::LexToken() { SkipBCPLComment(); else if (*CurPtr == '*') { if (SkipCComment()) - return YYERROR; - } else // Otherwise, return this / as a token. - return CurChar; + return tgtok::Error; + } else // Otherwise, this is an error. + return ReturnError(TokStart, "Unexpected character"); return LexToken(); case '-': case '+': case '0': case '1': case '2': case '3': case '4': case '5': case '6': @@ -134,30 +148,61 @@ int TGLexer::LexToken() { } /// LexString - Lex "[^"]*" -int TGLexer::LexString() { +tgtok::TokKind TGLexer::LexString() { const char *StrStart = CurPtr; + CurStrVal = ""; + while (*CurPtr != '"') { // If we hit the end of the buffer, report an error. - if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd()) { - TheError = "End of file in string literal"; - return YYERROR; - } else if (*CurPtr == '\n' || *CurPtr == '\r') { - TheError = "End of line in string literal"; - return YYERROR; - } + if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd()) + return ReturnError(StrStart, "End of file in string literal"); + if (*CurPtr == '\n' || *CurPtr == '\r') + return ReturnError(StrStart, "End of line in string literal"); + + if (*CurPtr != '\\') { + CurStrVal += *CurPtr++; + continue; + } + ++CurPtr; + + switch (*CurPtr) { + case '\\': case '\'': case '"': + // These turn into their literal character. + CurStrVal += *CurPtr++; + break; + case 't': + CurStrVal += '\t'; + ++CurPtr; + break; + case 'n': + CurStrVal += '\n'; + ++CurPtr; + break; + + case '\n': + case '\r': + return ReturnError(CurPtr, "escaped newlines not supported in tblgen"); + + // If we hit the end of the buffer, report an error. + case '\0': + if (CurPtr == CurBuf->getBufferEnd()) + return ReturnError(StrStart, "End of file in string literal"); + // FALL THROUGH + default: + return ReturnError(CurPtr, "invalid escape in string literal"); + } } - Filelval.StrVal = new std::string(StrStart, CurPtr); ++CurPtr; - return STRVAL; + return tgtok::StrVal; } -int TGLexer::LexVarName() { +tgtok::TokKind TGLexer::LexVarName() { if (!isalpha(CurPtr[0]) && CurPtr[0] != '_') - return '$'; // Invalid varname. + return ReturnError(TokStart, "Invalid variable name"); // Otherwise, we're ok, consume the rest of the characters. const char *VarNameStart = CurPtr++; @@ -165,81 +210,73 @@ int TGLexer::LexVarName() { while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') ++CurPtr; - Filelval.StrVal = new std::string(VarNameStart, CurPtr); - return VARNAME; + CurStrVal.assign(VarNameStart, CurPtr); + return tgtok::VarName; } -int TGLexer::LexIdentifier() { - // The first letter is [a-zA-Z_]. - const char *IdentStart = CurPtr-1; +tgtok::TokKind TGLexer::LexIdentifier() { + // The first letter is [a-zA-Z_#]. + const char *IdentStart = TokStart; - // Match the rest of the identifier regex: [0-9a-zA-Z_]* - while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') + // Match the rest of the identifier regex: [0-9a-zA-Z_#]* + while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' || + *CurPtr == '#') ++CurPtr; + // Check to see if this identifier is a keyword. unsigned Len = CurPtr-IdentStart; - if (Len == 3 && !memcmp(IdentStart, "int", 3)) return INT; - if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return BIT; - if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return BITS; - if (Len == 6 && !memcmp(IdentStart, "string", 6)) return STRING; - if (Len == 4 && !memcmp(IdentStart, "list", 4)) return LIST; - if (Len == 4 && !memcmp(IdentStart, "code", 4)) return CODE; - if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return DAG; + if (Len == 3 && !memcmp(IdentStart, "int", 3)) return tgtok::Int; + if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return tgtok::Bit; + if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return tgtok::Bits; + if (Len == 6 && !memcmp(IdentStart, "string", 6)) return tgtok::String; + if (Len == 4 && !memcmp(IdentStart, "list", 4)) return tgtok::List; + if (Len == 4 && !memcmp(IdentStart, "code", 4)) return tgtok::Code; + if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return tgtok::Dag; - if (Len == 5 && !memcmp(IdentStart, "class", 5)) return CLASS; - if (Len == 3 && !memcmp(IdentStart, "def", 3)) return DEF; - if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return DEFM; - if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) return MULTICLASS; - if (Len == 5 && !memcmp(IdentStart, "field", 5)) return FIELD; - if (Len == 3 && !memcmp(IdentStart, "let", 3)) return LET; - if (Len == 2 && !memcmp(IdentStart, "in", 2)) return IN; + if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class; + if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def; + if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm; + if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) + return tgtok::MultiClass; + if (Len == 5 && !memcmp(IdentStart, "field", 5)) return tgtok::Field; + if (Len == 3 && !memcmp(IdentStart, "let", 3)) return tgtok::Let; + if (Len == 2 && !memcmp(IdentStart, "in", 2)) return tgtok::In; if (Len == 7 && !memcmp(IdentStart, "include", 7)) { - if (LexInclude()) return YYERROR; - return LexToken(); + if (LexInclude()) return tgtok::Error; + return Lex(); } - Filelval.StrVal = new std::string(IdentStart, CurPtr); - return ID; + CurStrVal.assign(IdentStart, CurPtr); + return tgtok::Id; } /// LexInclude - We just read the "include" token. Get the string token that /// comes next and enter the include. bool TGLexer::LexInclude() { // The token after the include must be a string. - int Tok = LexToken(); - if (Tok == YYERROR) return true; - if (Tok != STRVAL) { - TheError = "Expected filename after include"; + tgtok::TokKind Tok = LexToken(); + if (Tok == tgtok::Error) return true; + if (Tok != tgtok::StrVal) { + PrintError(getLoc(), "Expected filename after include"); return true; } // Get the string. - std::string Filename = *Filelval.StrVal; - delete Filelval.StrVal; - - // Try to find the file. - MemoryBuffer *NewBuf = MemoryBuffer::getFile(&Filename[0], Filename.size()); + std::string Filename = CurStrVal; - // If the file didn't exist directly, see if it's in an include path. - for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) { - std::string IncFile = IncludeDirectories[i] + "/" + Filename; - NewBuf = MemoryBuffer::getFile(&IncFile[0], IncFile.size()); - } - - if (NewBuf == 0) { - TheError = "Could not find include file '" + Filename + "'"; + + CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr)); + if (CurBuffer == -1) { + PrintError(getLoc(), "Could not find include file '" + Filename + "'"); return true; } // Save the line number and lex buffer of the includer. - IncludeStack.push_back(IncludeRec(CurBuf, CurPtr, CurLineNo)); - - CurLineNo = 1; // Reset line numbering. - CurBuf = NewBuf; + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); CurPtr = CurBuf->getBufferStart(); return false; } @@ -272,7 +309,7 @@ bool TGLexer::SkipCComment() { int CurChar = getNextChar(); switch (CurChar) { case EOF: - TheError = "Unterminated comment!"; + PrintError(TokStart, "Unterminated comment!"); return true; case '*': // End of the comment? @@ -296,54 +333,64 @@ bool TGLexer::SkipCComment() { /// [-+]?[0-9]+ /// 0x[0-9a-fA-F]+ /// 0b[01]+ -int TGLexer::LexNumber() { - const char *NumStart = CurPtr-1; - +tgtok::TokKind TGLexer::LexNumber() { if (CurPtr[-1] == '0') { if (CurPtr[0] == 'x') { ++CurPtr; - NumStart = CurPtr; + const char *NumStart = CurPtr; while (isxdigit(CurPtr[0])) ++CurPtr; - if (CurPtr == NumStart) { - TheError = "Invalid hexadecimal number"; - return YYERROR; + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(TokStart, "Invalid hexadecimal number"); + + errno = 0; + CurIntVal = strtoll(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(TokStart, "Invalid hexadecimal number"); + if (errno == ERANGE) { + errno = 0; + CurIntVal = (int64_t)strtoull(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(TokStart, "Invalid hexadecimal number"); + if (errno == ERANGE) + return ReturnError(TokStart, "Hexadecimal number out of range"); } - Filelval.IntVal = strtoll(NumStart, 0, 16); - return INTVAL; + return tgtok::IntVal; } else if (CurPtr[0] == 'b') { ++CurPtr; - NumStart = CurPtr; + const char *NumStart = CurPtr; while (CurPtr[0] == '0' || CurPtr[0] == '1') ++CurPtr; - - if (CurPtr == NumStart) { - TheError = "Invalid binary number"; - return YYERROR; - } - Filelval.IntVal = strtoll(NumStart, 0, 2); - return INTVAL; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid binary number"); + CurIntVal = strtoll(NumStart, 0, 2); + return tgtok::IntVal; } } // Check for a sign without a digit. - if (CurPtr[-1] == '-' || CurPtr[-1] == '+') { - if (!isdigit(CurPtr[0])) - return CurPtr[-1]; + if (!isdigit(CurPtr[0])) { + if (CurPtr[-1] == '-') + return tgtok::minus; + else if (CurPtr[-1] == '+') + return tgtok::plus; } while (isdigit(CurPtr[0])) ++CurPtr; - Filelval.IntVal = strtoll(NumStart, 0, 10); - return INTVAL; + CurIntVal = strtoll(TokStart, 0, 10); + return tgtok::IntVal; } /// LexBracket - We just read '['. If this is a code block, return it, /// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]' -int TGLexer::LexBracket() { +tgtok::TokKind TGLexer::LexBracket() { if (CurPtr[0] != '{') - return '['; + return tgtok::l_square; ++CurPtr; const char *CodeStart = CurPtr; while (1) { @@ -355,85 +402,41 @@ int TGLexer::LexBracket() { Char = getNextChar(); if (Char == EOF) break; if (Char == ']') { - Filelval.StrVal = new std::string(CodeStart, CurPtr-2); - return CODEFRAGMENT; + CurStrVal.assign(CodeStart, CurPtr-2); + return tgtok::CodeFragment; } } - TheError = "Invalid Code Block"; - return YYERROR; + return ReturnError(CodeStart-2, "Unterminated Code Block"); } /// LexExclaim - Lex '!' and '![a-zA-Z]+'. -int TGLexer::LexExclaim() { +tgtok::TokKind TGLexer::LexExclaim() { if (!isalpha(*CurPtr)) - return '!'; + return ReturnError(CurPtr - 1, "Invalid \"!operator\""); const char *Start = CurPtr++; while (isalpha(*CurPtr)) ++CurPtr; // Check to see which operator this is. - unsigned Len = CurPtr-Start; - - if (Len == 3 && !memcmp(Start, "con", 3)) return CONCATTOK; - if (Len == 3 && !memcmp(Start, "sra", 3)) return SRATOK; - if (Len == 3 && !memcmp(Start, "srl", 3)) return SRLTOK; - if (Len == 3 && !memcmp(Start, "shl", 3)) return SHLTOK; - if (Len == 9 && !memcmp(Start, "strconcat", 9)) return STRCONCATTOK; - - TheError = "Unknown operator"; - return YYERROR; + tgtok::TokKind Kind = + StringSwitch(StringRef(Start, CurPtr - Start)) + .Case("eq", tgtok::XEq) + .Case("if", tgtok::XIf) + .Case("head", tgtok::XHead) + .Case("tail", tgtok::XTail) + .Case("con", tgtok::XConcat) + .Case("shl", tgtok::XSHL) + .Case("sra", tgtok::XSRA) + .Case("srl", tgtok::XSRL) + .Case("cast", tgtok::XCast) + .Case("empty", tgtok::XEmpty) + .Case("subst", tgtok::XSubst) + .Case("foreach", tgtok::XForEach) + .Case("strconcat", tgtok::XStrConcat) + .Default(tgtok::Error); + + return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); } -//===----------------------------------------------------------------------===// -// Interfaces used by the Bison parser. -//===----------------------------------------------------------------------===// - -int Fileparse(); -static TGLexer *TheLexer; - -namespace llvm { - -std::ostream &err() { - return TheLexer->err(); -} - -/// ParseFile - this function begins the parsing of the specified tablegen -/// file. -/// -void ParseFile(const std::string &Filename, - const std::vector &IncludeDirs) { - std::string ErrorStr; - MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(&Filename[0], Filename.size(), - &ErrorStr); - if (F == 0) { - cerr << "Could not open input file '" + Filename + "': " << ErrorStr <<"\n"; - exit(1); - } - - assert(!TheLexer && "Lexer isn't reentrant yet!"); - TheLexer = new TGLexer(F); - - // Record the location of the include directory so that the lexer can find - // it later. - TheLexer->setIncludeDirs(IncludeDirs); - - Fileparse(); - - // Cleanup - delete TheLexer; - TheLexer = 0; -} -} // End llvm namespace - - -int Filelex() { - assert(TheLexer && "No lexer setup yet!"); - int Tok = TheLexer->LexToken(); - if (Tok == YYERROR) { - err() << TheLexer->getError() << "\n"; - exit(1); - } - return Tok; -}