From d05b93515d19f2462909676db3e32ea08a6e359f Mon Sep 17 00:00:00 2001 From: David Peixotto Date: Fri, 6 Dec 2013 20:35:58 +0000 Subject: [PATCH] Integrated assembler incorrectly lexes ARM-style comments The integrated assembler fails to properly lex arm comments when they are adjacent to an identifier in the input stream. The reason is that the arm comment symbol '@' is also used as symbol variant in other assembly languages so when lexing an identifier it allows the '@' symbol as part of the identifier. Example: $ cat comment.s foo: add r0, r0@got to parse this as a comment $ llvm-mc -triple armv7 comment.s comment.s:4:18: error: unexpected token in argument list add r0, r0@got to parse this as a comment ^ This should be parsed as correctly as `add r0, r0`. This commit modifes the assembly lexer to not include the '@' symbol in identifiers when lexing for targets that use '@' for comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196607 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmLexer.cpp | 11 +++++++---- test/MC/ARM/comment.s | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 test/MC/ARM/comment.s diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index b49dd010479..a066e648303 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -139,20 +139,23 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { } /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* -static bool IsIdentifierChar(char c) { - return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?'; +static bool IsIdentifierChar(char c, bool AllowAt) { + return isalnum(c) || c == '_' || c == '$' || c == '.' || + (c == '@' && AllowAt) || c == '?'; } AsmToken AsmLexer::LexIdentifier() { + bool AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); // Check for floating point literals. if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { // Disambiguate a .1243foo identifier from a floating literal. while (isdigit(*CurPtr)) ++CurPtr; - if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr)) + if (*CurPtr == 'e' || *CurPtr == 'E' || + !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) return LexFloatLiteral(); } - while (IsIdentifierChar(*CurPtr)) + while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) ++CurPtr; // Handle . as a special case. diff --git a/test/MC/ARM/comment.s b/test/MC/ARM/comment.s new file mode 100644 index 00000000000..e95f313aca2 --- /dev/null +++ b/test/MC/ARM/comment.s @@ -0,0 +1,24 @@ +@ Tests to check that '@' does not get lexed as an identifier for arm +@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi | FileCheck %s +@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s --check-prefix=ERROR + +foo: + bl boo@plt should be ignored + bl goo@plt + .long bar@got to parse this as a comment + .long baz@got + add r0, r0@ignore this extra junk + +@CHECK-LABEL: foo: +@CHECK: bl boo +@CHECK-NOT: @ +@CHECK: bl goo +@CHECK-NOT: @ +@CHECK: .long bar +@CHECK-NOT: @ +@CHECK: .long baz +@CHECK-NOT: @ +@CHECK: add r0, r0 +@CHECK-NOT: @ + +@ERROR-NOT: error: -- 2.34.1