1 //===- TGPreprocessor.cpp - Preprocessor for TableGen ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Implement the Preprocessor for TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "TGPreprocessor.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/Support/MemoryBuffer.h"
17 #include "llvm/Support/SourceMgr.h"
18 #include "llvm/Support/ToolOutputFile.h"
19 #include "llvm/TableGen/Error.h"
28 typedef std::map<std::string, std::string> TGPPEnvironment;
49 bool MatchSymbol(TGPPTokenKind Kind,
50 const char *BeginOfToken, const char *EndOfToken,
53 bool MatchSymbol(TGPPTokenKind Kind,
54 const char *BeginOfToken, const char *EndOfToken,
57 bool MatchIdNum(TGPPTokenKind Kind,
58 const char *BeginOfToken, const char *EndOfToken);
60 bool MatchIdentifier(TGPPTokenKind Kind,
61 const char *BeginOfToken, const char *EndOfToken);
63 bool MatchNumber(TGPPTokenKind Kind,
64 const char *BeginOfToken, const char *EndOfToken,
68 const MemoryBuffer *CurBuf;
70 bool IsInsideMacroStatement, WasEndOfLine;
72 bool IsEndOfBuffer(const char *Ptr) const {
73 return (!*Ptr && Ptr == CurBuf->getBufferEnd());
77 if (*CurPtr == '\r' || *CurPtr == '\n') {
78 if ((CurPtr[1] == '\r' || CurPtr[1] == '\n') && CurPtr[0] != CurPtr[1])
85 bool MatchPrefix(const char *Prefix, const char *Ptr) const {
86 while (*Ptr == ' ' || *Ptr == '\t')
88 return !strncmp(Prefix, Ptr, strlen(Prefix));
91 TGPPLexer(const SourceMgr &SM)
92 : CurBuf(SM.getMemoryBuffer(0)),
93 CurPtr(CurBuf->getBufferStart()),
94 IsInsideMacroStatement(false),
98 TGPPTokenKind NextToken(const char **BeginOfToken, const char **EndOfToken);
101 // preprocessor records
106 std::vector<std::string> IndexVars;
107 std::vector<TGPPRange> IndexRanges;
108 TGPPRecords LoopBody;
110 // tgpprecord_variable, tgpprecord_literal
113 bool EvaluateFor(const TGPPEnvironment &Env, raw_fd_ostream &OS) const;
115 bool EvaluateVariable(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
116 TGPPEnvironment::const_iterator it_val = Env.find(Str);
117 if (it_val == Env.end()) {
118 PrintError("Var is not bound to any value: " + Str);
121 OS << it_val->second;
125 bool EvaluateLiteral(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
131 TGPPRecord(TGPPRecordKind K) : Kind(K) {}
132 TGPPRecord(TGPPRecordKind K, const std::string &S) : Kind(K), Str(S) {}
134 TGPPRecords *GetLoopBody() { return &LoopBody; }
136 void AppendIndex(const std::string &V, const TGPPRange &R) {
137 IndexVars.push_back(V);
138 IndexRanges.push_back(R);
141 bool Evaluate(const TGPPEnvironment &Env, raw_fd_ostream &OS) const;
148 std::vector<std::string> Vals;
150 // tgpprange_sequence
154 TGPPRange() : Kind(tgpprange_list) {}
155 TGPPRange(long int F, long int T)
156 : Kind(tgpprange_sequence), From(F), To(T) {}
158 size_t size() const {
159 if (Kind == tgpprange_list)
162 return To - From + 1;
165 std::string at(size_t i) const {
166 if (Kind == tgpprange_list)
170 raw_string_ostream Tmp(Result);
171 Tmp << (From + (long int)i);
176 void push_back(const std::string &S) {
177 if (Kind == tgpprange_list)
183 using namespace llvm;
185 bool llvm::MatchSymbol(TGPPTokenKind Kind,
186 const char *BeginOfToken, const char *EndOfToken,
188 return Kind == tgpptoken_symbol &&
189 BeginOfToken + 1 == EndOfToken &&
190 Symbol == *BeginOfToken;
193 bool llvm::MatchSymbol(TGPPTokenKind Kind,
194 const char *BeginOfToken, const char *EndOfToken,
195 const char *Symbol) {
196 return Kind == tgpptoken_symbol &&
197 BeginOfToken + strlen(Symbol) == EndOfToken &&
198 !strncmp(Symbol, BeginOfToken, EndOfToken - BeginOfToken);
201 bool llvm::MatchIdNum(TGPPTokenKind Kind,
202 const char *BeginOfToken, const char *EndOfToken) {
203 if (Kind != tgpptoken_symbol)
205 for (const char *i = BeginOfToken; i != EndOfToken; ++i)
206 if (*i != '_' && !isalnum(*i))
211 bool llvm::MatchIdentifier(TGPPTokenKind Kind,
212 const char *BeginOfToken, const char *EndOfToken) {
213 if (Kind != tgpptoken_symbol)
216 const char *i = BeginOfToken;
217 if (*i != '_' && !isalpha(*i))
219 for (++i; i != EndOfToken; ++i)
220 if (*i != '_' && !isalnum(*i))
226 bool llvm::MatchNumber(TGPPTokenKind Kind,
227 const char *BeginOfToken, const char *EndOfToken,
229 if (Kind != tgpptoken_symbol)
232 *Val = strtol(BeginOfToken, &e, 10);
233 return e == EndOfToken;
236 TGPPTokenKind TGPPLexer::
237 NextToken(const char **BeginOfToken, const char **EndOfToken) {
238 bool IsBeginOfLine = WasEndOfLine;
239 WasEndOfLine = false;
241 if (IsEndOfBuffer(CurPtr))
242 return tgpptoken_end;
244 else if (IsInsideMacroStatement) {
245 while (*CurPtr == ' ' || *CurPtr == '\t') // trim space, if any
248 const char *BeginOfSymbol = CurPtr;
252 IsInsideMacroStatement = false;
254 return tgpptoken_newline;
257 else if (*CurPtr == '[' || *CurPtr == ']' ||
258 *CurPtr == '(' || *CurPtr == ')' ||
259 *CurPtr == ',' || *CurPtr == '=') {
260 *BeginOfToken = BeginOfSymbol;
261 *EndOfToken = ++CurPtr;
262 return tgpptoken_symbol;
265 else if (*CurPtr == '_' || isalpha(*CurPtr)) {
267 while (*CurPtr == '_' || isalnum(*CurPtr))
269 *BeginOfToken = BeginOfSymbol;
270 *EndOfToken = CurPtr;
271 return tgpptoken_symbol;
274 else if (*CurPtr == '+' || *CurPtr == '-' || isdigit(*CurPtr)) {
276 while (isdigit(*CurPtr))
278 *BeginOfToken = BeginOfSymbol;
279 *EndOfToken = CurPtr;
280 return tgpptoken_symbol;
284 PrintError(BeginOfSymbol, "Unrecognizable token");
285 return tgpptoken_error;
289 else if (*CurPtr == '#') {
291 (MatchPrefix("for", CurPtr + 1) ||
292 MatchPrefix("end", CurPtr + 1))) {
294 IsInsideMacroStatement = true;
295 return NextToken(BeginOfToken, EndOfToken);
298 // special token #"# is translate to literal "
299 else if (CurPtr[1] == '"' && CurPtr[2] == '#') {
300 *BeginOfToken = ++CurPtr;
301 *EndOfToken = ++CurPtr;
303 return tgpptoken_literal;
307 const char *BeginOfVar = ++CurPtr; // trim '#'
308 if (*CurPtr != '_' && !isalpha(*CurPtr)) {
309 PrintError(BeginOfVar, "Variable must start with [_A-Za-z]: ");
310 return tgpptoken_error;
312 while (*CurPtr == '_' || isalnum(*CurPtr))
314 if (*CurPtr != '#') {
315 PrintError(BeginOfVar, "Variable must end with #");
316 return tgpptoken_error;
318 *BeginOfToken = BeginOfVar;
319 *EndOfToken = CurPtr++; // trim '#'
320 return tgpptoken_symbol;
324 const char *BeginOfLiteral = CurPtr;
325 int CCommentLevel = 0;
326 bool BCPLComment = false;
327 bool StringLiteral = false;
328 for (; !IsEndOfBuffer(CurPtr); ++CurPtr) {
329 if (CCommentLevel > 0) {
330 if (CurPtr[0] == '/' && CurPtr[1] == '*') {
333 } else if (CurPtr[0] == '*' && CurPtr[1] == '/') {
336 } else if (IsNewLine())
340 else if (BCPLComment) {
347 else if (StringLiteral) {
348 // no string escape sequence in TableGen?
350 StringLiteral = false;
353 else if (CurPtr[0] == '/' && CurPtr[1] == '*') {
358 else if (CurPtr[0] == '/' && CurPtr[1] == '/') {
363 else if (*CurPtr == '"')
364 StringLiteral = true;
366 else if (IsNewLine()) {
372 else if (*CurPtr == '#')
376 *BeginOfToken = BeginOfLiteral;
377 *EndOfToken = CurPtr;
378 return tgpptoken_literal;
382 EvaluateFor(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
383 std::vector<TGPPRange>::const_iterator ri, re;
385 // calculate the min size
386 ri = IndexRanges.begin();
387 re = IndexRanges.begin();
388 size_t n = ri->size();
389 for (; ri != re; ++ri) {
390 size_t m = ri->size();
395 for (size_t which_val = 0; which_val < n; ++which_val) {
396 // construct nested environment
397 TGPPEnvironment NestedEnv(Env);
398 std::vector<std::string>::const_iterator vi = IndexVars.begin();
399 for (ri = IndexRanges.begin(), re = IndexRanges.end();
400 ri != re; ++vi, ++ri) {
401 NestedEnv.insert(std::make_pair(*vi, ri->at(which_val)));
404 for (TGPPRecords::const_iterator i = LoopBody.begin(), e = LoopBody.end();
406 if (i->Evaluate(NestedEnv, OS))
414 Evaluate(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
417 return EvaluateFor(Env, OS);
418 case tgpprecord_variable:
419 return EvaluateVariable(Env, OS);
420 case tgpprecord_literal:
421 return EvaluateLiteral(Env, OS);
423 PrintError("Unknown kind of record: " + Kind);
429 bool TGPreprocessor::ParseBlock(bool TopLevel) {
431 const char *BeginOfToken, *EndOfToken;
432 while ((Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken)) !=
434 std::string Symbol(BeginOfToken, EndOfToken);
436 case tgpptoken_symbol:
437 if (Symbol == "for") {
440 } else if (Symbol == "end") {
442 PrintError(BeginOfToken, "No block to end here");
445 if ((Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken)) !=
447 PrintError(BeginOfToken, "Tokens after #end");
451 } else if (Symbol == "NAME") {
452 // treat '#NAME#' as a literal
453 CurRecords->push_back(
454 TGPPRecord(tgpprecord_literal,
455 std::string("#NAME#")));
457 CurRecords->push_back(
458 TGPPRecord(tgpprecord_variable,
459 std::string(BeginOfToken, EndOfToken)));
462 case tgpptoken_literal:
463 CurRecords->push_back(
464 TGPPRecord(tgpprecord_literal,
465 std::string(BeginOfToken, EndOfToken)));
474 bool TGPreprocessor::ParseForLoop() {
475 TGPPRecord ForLoopRecord(tgpprecord_for);
479 const char *BeginOfToken, *EndOfToken;
481 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
482 if (!MatchIdentifier(Kind, BeginOfToken, EndOfToken)) {
483 PrintError(BeginOfToken, "Not an identifier");
486 std::string IndexVar(BeginOfToken, EndOfToken);
488 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
489 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, '=')) {
490 PrintError(BeginOfToken, "Need a '=' here");
495 if (ParseRange(&Range))
497 ForLoopRecord.AppendIndex(IndexVar, Range);
499 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
500 if (Kind == tgpptoken_newline)
502 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) {
503 PrintError(BeginOfToken, "Need a ',' here");
509 TGPPRecords *LastCurRecords = CurRecords;
510 CurRecords = ForLoopRecord.GetLoopBody();
512 if (ParseBlock(false))
515 CurRecords = LastCurRecords;
516 CurRecords->push_back(ForLoopRecord);
520 bool TGPreprocessor::ParseRange(TGPPRange *Range) {
522 const char *BeginOfToken, *EndOfToken;
524 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
526 if (MatchSymbol(Kind, BeginOfToken, EndOfToken, '[')) {
528 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
529 if (!MatchIdNum(Kind, BeginOfToken, EndOfToken)) {
530 PrintError(BeginOfToken, "Need a identifier or a number here");
533 Range->push_back(std::string(BeginOfToken, EndOfToken));
535 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
536 if (MatchSymbol(Kind, BeginOfToken, EndOfToken, ']'))
538 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) {
539 PrintError(BeginOfToken, "Need a comma here");
546 else if (MatchSymbol(Kind, BeginOfToken, EndOfToken, "sequence")) {
549 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
550 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, '(')) {
551 PrintError(BeginOfToken, "Need a left parentheses here");
555 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
556 if (!MatchNumber(Kind, BeginOfToken, EndOfToken, &from)) {
557 PrintError(BeginOfToken, "Not a number");
561 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
562 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) {
563 PrintError(BeginOfToken, "Need a comma here");
567 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
568 if (!MatchNumber(Kind, BeginOfToken, EndOfToken, &to)) {
569 PrintError(BeginOfToken, "Not a number");
573 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
574 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ')')) {
575 PrintError(BeginOfToken, "Need a right parentheses here");
579 *Range = TGPPRange(from, to);
583 PrintError(BeginOfToken, "illegal range of loop index");
587 bool TGPreprocessor::PreprocessFile() {
588 TGPPLexer TheLexer(SrcMgr);
589 TGPPRecords TopLevelRecords;
592 CurRecords = &TopLevelRecords;
593 if (ParseBlock(true))
597 for (TGPPRecords::const_iterator i = TopLevelRecords.begin(),
598 e = TopLevelRecords.end();
600 if (i->Evaluate(Env, Out.os()))