1 /*===-- Lexer.l - Scanner for llvm assembly files --------------*- C++ -*--===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the flex scanner for LLVM assembly languages files.
12 //===----------------------------------------------------------------------===*/
14 %option prefix="llvmAsm"
17 %option never-interactive
22 %option outfile="Lexer.cpp"
28 #include "ParserInternals.h"
29 #include "llvm/Module.h"
31 #include "llvmAsmParser.h"
35 #define RET_TOK(type, Enum, sym) \
36 llvmAsmlval.type = Instruction::Enum; return sym
39 // TODO: All of the static identifiers are figured out by the lexer,
40 // these should be hashed to reduce the lexer size
43 // atoull - Convert an ascii string of decimal digits into the unsigned long
44 // long representation... this does not have to do input error checking,
45 // because we know that the input will be matched by a suitable regex...
47 static uint64_t atoull(const char *Buffer) {
49 for (; *Buffer; Buffer++) {
50 uint64_t OldRes = Result;
52 Result += *Buffer-'0';
53 if (Result < OldRes) // Uh, oh, overflow detected!!!
54 ThrowException("constant bigger than 64 bits detected!");
59 static uint64_t HexIntToVal(const char *Buffer) {
61 for (; *Buffer; ++Buffer) {
62 uint64_t OldRes = Result;
65 if (C >= '0' && C <= '9')
67 else if (C >= 'A' && C <= 'F')
69 else if (C >= 'a' && C <= 'f')
72 if (Result < OldRes) // Uh, oh, overflow detected!!!
73 ThrowException("constant bigger than 64 bits detected!");
79 // HexToFP - Convert the ascii string in hexidecimal format to the floating
80 // point representation of it.
82 static double HexToFP(const char *Buffer) {
83 // Behave nicely in the face of C TBAA rules... see:
84 // http://www.nullstone.com/htmls/category/aliastyp.htm
89 UIntToFP.UI = HexIntToVal(Buffer);
91 assert(sizeof(double) == sizeof(uint64_t) &&
92 "Data sizes incompatible on this target!");
93 return UIntToFP.FP; // Cast Hex constant to double
97 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
98 // appropriate character. If AllowNull is set to false, a \00 value will cause
99 // an exception to be thrown.
101 // If AllowNull is set to true, the return value of the function points to the
102 // last character of the string in memory.
104 char *UnEscapeLexed(char *Buffer, bool AllowNull) {
106 for (char *BIn = Buffer; *BIn; ) {
107 if (BIn[0] == '\\' && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
108 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
109 *BOut = strtol(BIn+1, 0, 16); // Convert to number
110 if (!AllowNull && !*BOut)
111 ThrowException("String literal cannot accept \\00 escape!");
113 BIn[3] = Tmp; // Restore character
114 BIn += 3; // Skip over handled chars
124 #define YY_NEVER_INTERACTIVE 1
129 /* Comments start with a ; and go till end of line */
132 /* Variable(Value) identifiers start with a % sign */
133 VarID %[-a-zA-Z$._][-a-zA-Z$._0-9]*
135 /* Label identifiers end with a colon */
136 Label [-a-zA-Z$._0-9]+:
138 /* Quoted names can contain any character except " and \ */
139 StringConstant \"[^\"]*\"
142 /* [PN]Integer: match positive and negative literal integer values that
143 * are preceeded by a '%' character. These represent unnamed variable slots.
149 /* E[PN]Integer: match positive and negative literal integer values */
153 /* FPConstant - A Floating point constant.
155 FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
157 /* HexFPConstant - Floating point constant represented in IEEE format as a
158 * hexadecimal number for when exponential notation is not precise enough.
160 HexFPConstant 0x[0-9A-Fa-f]+
162 /* HexIntConstant - Hexadecimal constant generated by the CFE to avoid forcing
163 * it to deal with 64 bit numbers.
165 HexIntConstant [us]0x[0-9A-Fa-f]+
168 {Comment} { /* Ignore comments for now */ }
170 begin { return BEGINTOK; }
171 end { return ENDTOK; }
172 true { return TRUE; }
173 false { return FALSE; }
174 declare { return DECLARE; }
175 global { return GLOBAL; }
176 constant { return CONSTANT; }
177 const { return CONST; }
178 internal { return INTERNAL; }
179 linkonce { return LINKONCE; }
180 weak { return WEAK; }
181 appending { return APPENDING; }
182 uninitialized { return EXTERNAL; } /* Deprecated, turn into external */
183 external { return EXTERNAL; }
184 implementation { return IMPLEMENTATION; }
185 zeroinitializer { return ZEROINITIALIZER; }
186 \.\.\. { return DOTDOTDOT; }
187 null { return NULL_TOK; }
189 except { return EXCEPT; }
190 not { return NOT; } /* Deprecated, turned into XOR */
191 target { return TARGET; }
192 endian { return ENDIAN; }
193 pointersize { return POINTERSIZE; }
194 little { return LITTLE; }
196 volatile { return VOLATILE; }
198 void { llvmAsmlval.PrimType = Type::VoidTy ; return VOID; }
199 bool { llvmAsmlval.PrimType = Type::BoolTy ; return BOOL; }
200 sbyte { llvmAsmlval.PrimType = Type::SByteTy ; return SBYTE; }
201 ubyte { llvmAsmlval.PrimType = Type::UByteTy ; return UBYTE; }
202 short { llvmAsmlval.PrimType = Type::ShortTy ; return SHORT; }
203 ushort { llvmAsmlval.PrimType = Type::UShortTy; return USHORT; }
204 int { llvmAsmlval.PrimType = Type::IntTy ; return INT; }
205 uint { llvmAsmlval.PrimType = Type::UIntTy ; return UINT; }
206 long { llvmAsmlval.PrimType = Type::LongTy ; return LONG; }
207 ulong { llvmAsmlval.PrimType = Type::ULongTy ; return ULONG; }
208 float { llvmAsmlval.PrimType = Type::FloatTy ; return FLOAT; }
209 double { llvmAsmlval.PrimType = Type::DoubleTy; return DOUBLE; }
210 type { llvmAsmlval.PrimType = Type::TypeTy ; return TYPE; }
211 label { llvmAsmlval.PrimType = Type::LabelTy ; return LABEL; }
212 opaque { return OPAQUE; }
214 add { RET_TOK(BinaryOpVal, Add, ADD); }
215 sub { RET_TOK(BinaryOpVal, Sub, SUB); }
216 mul { RET_TOK(BinaryOpVal, Mul, MUL); }
217 div { RET_TOK(BinaryOpVal, Div, DIV); }
218 rem { RET_TOK(BinaryOpVal, Rem, REM); }
219 and { RET_TOK(BinaryOpVal, And, AND); }
220 or { RET_TOK(BinaryOpVal, Or , OR ); }
221 xor { RET_TOK(BinaryOpVal, Xor, XOR); }
222 setne { RET_TOK(BinaryOpVal, SetNE, SETNE); }
223 seteq { RET_TOK(BinaryOpVal, SetEQ, SETEQ); }
224 setlt { RET_TOK(BinaryOpVal, SetLT, SETLT); }
225 setgt { RET_TOK(BinaryOpVal, SetGT, SETGT); }
226 setle { RET_TOK(BinaryOpVal, SetLE, SETLE); }
227 setge { RET_TOK(BinaryOpVal, SetGE, SETGE); }
229 phi { RET_TOK(OtherOpVal, PHI, PHI_TOK); }
230 call { RET_TOK(OtherOpVal, Call, CALL); }
231 cast { RET_TOK(OtherOpVal, Cast, CAST); }
232 shl { RET_TOK(OtherOpVal, Shl, SHL); }
233 shr { RET_TOK(OtherOpVal, Shr, SHR); }
234 va_arg { return VA_ARG; /* FIXME: OBSOLETE */}
235 vanext { RET_TOK(OtherOpVal, VANext, VANEXT); }
236 vaarg { RET_TOK(OtherOpVal, VAArg , VAARG); }
238 ret { RET_TOK(TermOpVal, Ret, RET); }
239 br { RET_TOK(TermOpVal, Br, BR); }
240 switch { RET_TOK(TermOpVal, Switch, SWITCH); }
241 invoke { RET_TOK(TermOpVal, Invoke, INVOKE); }
242 unwind { RET_TOK(TermOpVal, Unwind, UNWIND); }
245 malloc { RET_TOK(MemOpVal, Malloc, MALLOC); }
246 alloca { RET_TOK(MemOpVal, Alloca, ALLOCA); }
247 free { RET_TOK(MemOpVal, Free, FREE); }
248 load { RET_TOK(MemOpVal, Load, LOAD); }
249 store { RET_TOK(MemOpVal, Store, STORE); }
250 getelementptr { RET_TOK(MemOpVal, GetElementPtr, GETELEMENTPTR); }
254 UnEscapeLexed(yytext+1);
255 llvmAsmlval.StrVal = strdup(yytext+1); // Skip %
259 yytext[strlen(yytext)-1] = 0; // nuke colon
260 UnEscapeLexed(yytext);
261 llvmAsmlval.StrVal = strdup(yytext);
265 {StringConstant} { // Note that we cannot unescape a string constant here! The
266 // string constant might contain a \00 which would not be
267 // understood by the string stuff. It is valid to make a
268 // [sbyte] c"Hello World\00" constant, for example.
270 yytext[strlen(yytext)-1] = 0; // nuke end quote
271 llvmAsmlval.StrVal = strdup(yytext+1); // Nuke start quote
272 return STRINGCONSTANT;
276 {PInteger} { llvmAsmlval.UInt64Val = atoull(yytext); return EUINT64VAL; }
278 uint64_t Val = atoull(yytext+1);
279 // +1: we have bigger negative range
280 if (Val > (uint64_t)INT64_MAX+1)
281 ThrowException("Constant too large for signed 64 bits!");
282 llvmAsmlval.SInt64Val = -Val;
286 llvmAsmlval.UInt64Val = HexIntToVal(yytext+3);
287 return yytext[0] == 's' ? ESINT64VAL : EUINT64VAL;
290 {EPInteger} { llvmAsmlval.UIntVal = atoull(yytext+1); return UINTVAL; }
292 uint64_t Val = atoull(yytext+2);
293 // +1: we have bigger negative range
294 if (Val > (uint64_t)INT32_MAX+1)
295 ThrowException("Constant too large for signed 32 bits!");
296 llvmAsmlval.SIntVal = -Val;
300 {FPConstant} { llvmAsmlval.FPVal = atof(yytext); return FPVAL; }
301 {HexFPConstant} { llvmAsmlval.FPVal = HexToFP(yytext); return FPVAL; }
303 [ \t\n] { /* Ignore whitespace */ }
304 . { return yytext[0]; }