CurBuf = NULL;
CurPtr = NULL;
isAtStartOfLine = true;
+ AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
}
AsmLexer::~AsmLexer() {
StringRef(TokStart, CurPtr - TokStart));
}
-/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
-static bool IsIdentifierChar(char c) {
- return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+/// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
+/// while making sure there are enough actual digits around for the constant to
+/// be valid.
+///
+/// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
+/// before we get here.
+AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
+ assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
+ "unexpected parse state in floating hex");
+ bool NoFracDigits = true;
+
+ // Skip the fractional part if there is one
+ if (*CurPtr == '.') {
+ ++CurPtr;
+
+ const char *FracStart = CurPtr;
+ while (isxdigit(*CurPtr))
+ ++CurPtr;
+
+ NoFracDigits = CurPtr == FracStart;
+ }
+
+ if (NoIntDigits && NoFracDigits)
+ return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
+ "expected at least one significand digit");
+
+ // Make sure we do have some kind of proper exponent part
+ if (*CurPtr != 'p' && *CurPtr != 'P')
+ return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
+ "expected exponent part 'p'");
+ ++CurPtr;
+
+ if (*CurPtr == '+' || *CurPtr == '-')
+ ++CurPtr;
+
+ // N.b. exponent digits are *not* hex
+ const char *ExpStart = CurPtr;
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+
+ if (CurPtr == ExpStart)
+ return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
+ "expected at least one exponent digit");
+
+ return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
+static bool IsIdentifierChar(char c, bool AllowAt) {
+ return isalnum(c) || c == '_' || c == '$' || c == '.' ||
+ (c == '@' && AllowAt) || c == '?';
}
AsmToken AsmLexer::LexIdentifier() {
// Check for floating point literals.
// Disambiguate a .1243foo identifier from a floating literal.
while (isdigit(*CurPtr))
++CurPtr;
- if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
+ if (*CurPtr == 'e' || *CurPtr == 'E' ||
+ !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
return LexFloatLiteral();
}
- while (IsIdentifierChar(*CurPtr))
+ while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
++CurPtr;
// Handle . as a special case.
}
static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
- if (CurPtr[0] == 'L' && CurPtr[1] == 'L')
- CurPtr += 2;
- if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L')
- CurPtr += 3;
+ // Skip ULL, UL, U, L and LL suffices.
+ if (CurPtr[0] == 'U')
+ ++CurPtr;
+ if (CurPtr[0] == 'L')
+ ++CurPtr;
+ if (CurPtr[0] == 'L')
+ ++CurPtr;
}
// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
}
}
bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
- bool isBinary = LookAhead[-1] == 'b' || LookAhead[-1] == 'B';
- CurPtr = (isBinary || isHex || !FirstHex) ? LookAhead : FirstHex;
+ CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
if (isHex)
return 16;
- if (isBinary) {
- --CurPtr;
- return 2;
- }
return DefaultRadix;
}
/// LexDigit: First character is [0-9].
/// Local Label: [0-9][:]
-/// Forward/Backward Label: [0-9]+f or [0-9]b
-/// Binary integer: 0b[01]+ or [01][bB]
+/// Forward/Backward Label: [0-9][fb]
+/// Binary integer: 0b[01]+
/// Octal integer: 0[0-7]+
/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
/// Decimal integer: [1-9][0-9]*
AsmToken AsmLexer::LexDigit() {
-
- // Backward Label: [0-9]b
- if (*CurPtr == 'b') {
- // See if we actually have "0b" as part of something like "jmp 0b\n"
- if (!isdigit(CurPtr[1])) {
- long long Value;
- StringRef Result(TokStart, CurPtr - TokStart);
- if (Result.getAsInteger(10, Value))
- return ReturnError(TokStart, "invalid backward label");
-
- return AsmToken(AsmToken::Integer, Result, Value);
- }
- }
-
- // Binary integer: 1[01]*[bB]
// Decimal integer: [1-9][0-9]*
- // Hexidecimal integer: [1-9][0-9a-fA-F]*[hH]
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
unsigned Radix = doLookAhead(CurPtr, 10);
- bool isDecimal = Radix == 10;
-
+ bool isHex = Radix == 16;
// Check for floating point literals.
- if (isDecimal && (*CurPtr == '.' || *CurPtr == 'e')) {
+ if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
++CurPtr;
return LexFloatLiteral();
}
// integer, but that do fit in an unsigned one, we just convert them over.
unsigned long long UValue;
if (Result.getAsInteger(Radix, UValue))
- return ReturnError(TokStart, isDecimal ? "invalid decimal number" :
+ return ReturnError(TokStart, !isHex ? "invalid decimal number" :
"invalid hexdecimal number");
Value = (long long)UValue;
}
if (Radix == 2 || Radix == 16)
++CurPtr;
- // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
- // suffixes on integer literals.
+ // The darwin/x86 (and x86-64) assembler accepts and ignores type
+ // suffices on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);
return AsmToken(AsmToken::Integer, Result, Value);
}
- // Binary integer: 0b[01]+
if (*CurPtr == 'b') {
- const char *NumStart = ++CurPtr;
+ ++CurPtr;
+ // See if we actually have "0b" as part of something like "jmp 0b\n"
+ if (!isdigit(CurPtr[0])) {
+ --CurPtr;
+ StringRef Result(TokStart, CurPtr - TokStart);
+ return AsmToken(AsmToken::Integer, Result, 0);
+ }
+ const char *NumStart = CurPtr;
while (CurPtr[0] == '0' || CurPtr[0] == '1')
++CurPtr;
return AsmToken(AsmToken::Integer, Result, Value);
}
- // Hex integer: 0x[0-9a-fA-F]+
if (*CurPtr == 'x') {
++CurPtr;
const char *NumStart = CurPtr;
while (isxdigit(CurPtr[0]))
++CurPtr;
- // Requires at least one hex digit.
+ // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
+ // diagnosed by LexHexFloatLiteral).
+ if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
+ return LexHexFloatLiteral(NumStart == CurPtr);
+
+ // Otherwise requires at least one hex digit.
if (CurPtr == NumStart)
return ReturnError(CurPtr-2, "invalid hexadecimal number");
(int64_t)Result);
}
- // Binary: 0[01]*[Bb], but not 0b.
- // Octal: 0[0-7]*
- // Hexidecimal: [0][0-9a-fA-F]*[hH]
+ // Either octal or hexadecimal.
long long Value;
unsigned Radix = doLookAhead(CurPtr, 8);
- bool isBinary = Radix == 2;
- bool isOctal = Radix == 8;
+ bool isHex = Radix == 16;
StringRef Result(TokStart, CurPtr - TokStart);
if (Result.getAsInteger(Radix, Value))
- return ReturnError(TokStart, isOctal ? "invalid octal number" :
- isBinary ? "invalid binary number" :
+ return ReturnError(TokStart, !isHex ? "invalid octal number" :
"invalid hexdecimal number");
- // Consume the [bB][hH].
- if (Radix == 2 || Radix == 16)
+ // Consume the [hH].
+ if (Radix == 16)
++CurPtr;
// The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL