ARM label operands can be quoted.

[oota-llvm.git] / lib / TableGen / TGLexer.cpp
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp

index 0dc1c70136b5e154fab0c89b610f139882cf3c31..45d0b1ed2b046546c3af000fd53489112b6cf04d 100644 (file)
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -80,6 +80,10 @@ int TGLexer::getNextChar() {
    }  
  }
  
+int TGLexer::peekNextChar(int Index) {
+  return *(CurPtr + Index);
+}
+
  tgtok::TokKind TGLexer::LexToken() {
    TokStart = CurPtr;
    // This always consumes at least one character.
@@ -87,10 +91,10 @@ tgtok::TokKind TGLexer::LexToken() {
  
    switch (CurChar) {
    default:
-    // Handle letters: [a-zA-Z_#]
-    if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
+    // Handle letters: [a-zA-Z_]
+    if (isalpha(CurChar) || CurChar == '_')
        return LexIdentifier();
-      
+
      // Unknown character, emit an error.
      return ReturnError(TokStart, "Unexpected character");
    case EOF: return tgtok::Eof;
@@ -107,6 +111,7 @@ tgtok::TokKind TGLexer::LexToken() {
    case ')': return tgtok::r_paren;
    case '=': return tgtok::equal;
    case '?': return tgtok::question;
+  case '#': return tgtok::paste;
        
    case 0:
    case ' ':
@@ -128,8 +133,44 @@ tgtok::TokKind TGLexer::LexToken() {
      return LexToken();
    case '-': case '+':
    case '0': case '1': case '2': case '3': case '4': case '5': case '6':
-  case '7': case '8': case '9':  
+  case '7': case '8': case '9': {
+    int NextChar = 0;
+    if (isdigit(CurChar)) {
+      // Allow identifiers to start with a number if it is followed by
+      // an identifier.  This can happen with paste operations like
+      // foo#8i.
+      int i = 0;
+      do {
+        NextChar = peekNextChar(i++);
+      } while (isdigit(NextChar));
+
+      if (NextChar == 'x' || NextChar == 'b') {
+        // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
+        // likely a number.
+        int NextNextChar = peekNextChar(i);
+        switch (NextNextChar) {
+        default:
+          break;
+        case '0': case '1': 
+          if (NextChar == 'b')
+            return LexNumber();
+          // Fallthrough
+        case '2': case '3': case '4': case '5':
+        case '6': case '7': case '8': case '9':
+        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+          if (NextChar == 'x')
+            return LexNumber();
+          break;
+        }
+      }
+    }
+
+    if (isalpha(NextChar) || NextChar == '_')
+      return LexIdentifier();
+
      return LexNumber();
+  }
    case '"': return LexString();
    case '$': return LexVarName();
    case '[': return LexBracket();
@@ -208,40 +249,39 @@ tgtok::TokKind TGLexer::LexVarName() {
  tgtok::TokKind TGLexer::LexIdentifier() {
    // The first letter is [a-zA-Z_#].
    const char *IdentStart = TokStart;
-  
+
    // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
-  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
-         *CurPtr == '#')
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
      ++CurPtr;
-  
-  
+
    // Check to see if this identifier is a keyword.
-  unsigned Len = CurPtr-IdentStart;
-  
-  if (Len == 3 && !memcmp(IdentStart, "int", 3)) return tgtok::Int;
-  if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return tgtok::Bit;
-  if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return tgtok::Bits;
-  if (Len == 6 && !memcmp(IdentStart, "string", 6)) return tgtok::String;
-  if (Len == 4 && !memcmp(IdentStart, "list", 4)) return tgtok::List;
-  if (Len == 4 && !memcmp(IdentStart, "code", 4)) return tgtok::Code;
-  if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return tgtok::Dag;
-  
-  if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class;
-  if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def;
-  if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm;
-  if (Len == 10 && !memcmp(IdentStart, "multiclass", 10))
-    return tgtok::MultiClass;
-  if (Len == 5 && !memcmp(IdentStart, "field", 5)) return tgtok::Field;
-  if (Len == 3 && !memcmp(IdentStart, "let", 3)) return tgtok::Let;
-  if (Len == 2 && !memcmp(IdentStart, "in", 2)) return tgtok::In;
-  
-  if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
+  StringRef Str(IdentStart, CurPtr-IdentStart);
+
+  if (Str == "include") {
      if (LexInclude()) return tgtok::Error;
      return Lex();
    }
-    
-  CurStrVal.assign(IdentStart, CurPtr);
-  return tgtok::Id;
+
+  tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
+    .Case("int", tgtok::Int)
+    .Case("bit", tgtok::Bit)
+    .Case("bits", tgtok::Bits)
+    .Case("string", tgtok::String)
+    .Case("list", tgtok::List)
+    .Case("code", tgtok::Code)
+    .Case("dag", tgtok::Dag)
+    .Case("class", tgtok::Class)
+    .Case("def", tgtok::Def)
+    .Case("defm", tgtok::Defm)
+    .Case("multiclass", tgtok::MultiClass)
+    .Case("field", tgtok::Field)
+    .Case("let", tgtok::Let)
+    .Case("in", tgtok::In)
+    .Default(tgtok::Id);
+
+  if (Kind == tgtok::Id)
+    CurStrVal.assign(Str.begin(), Str.end());
+  return Kind;
  }
  
  /// LexInclude - We just read the "include" token.  Get the string token that