tools/llvm-upgrade/UpgradeLexer.l

   1 /*===-- UpgradeLexer.l - Scanner for 1.9 assembly files --------*- C++ -*--===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Reid Spencer and is distributed under the
   6 // University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 //  This file implements the flex scanner for LLVM 1.9 assembly languages files.
  11 //  This doesn't handle long double constants, since LLVM 1.9 did not have them.
  12 //
  13 //===----------------------------------------------------------------------===*/
  14
  15 %option prefix="Upgrade"
  16 %option yylineno
  17 %option nostdinit
  18 %option never-interactive
  19 %option batch
  20 %option noyywrap
  21 %option nodefault
  22 %option 8bit
  23 %option outfile="UpgradeLexer.cpp"
  24 %option ecs
  25 %option noreject
  26 %option noyymore
  27
  28 %{
  29 #include "UpgradeInternals.h"
  30 #include "llvm/Module.h"
  31 #include <list>
  32 #include "UpgradeParser.h"
  33 #include <cctype>
  34 #include <cstdlib>
  35
  36 #define YY_INPUT(buf,result,max_size) \
  37 { \
  38   if (LexInput->good() && !LexInput->eof()) { \
  39     LexInput->read(buf,max_size); \
  40     result = LexInput->gcount(); \
  41   } else {\
  42     result = YY_NULL; \
  43   } \
  44 }
  45
  46 #define YY_NEVER_INTERACTIVE 1
  47
  48 // Construct a token value for a non-obsolete token
  49 #define RET_TOK(type, Enum, sym) \
  50   Upgradelval.type = Enum; \
  51   return sym
  52
  53 #define RET_TY(sym,NewTY,sign) \
  54   Upgradelval.PrimType.T = NewTY; \
  55   switch (sign) { \
  56     case 0: Upgradelval.PrimType.S.makeSignless(); break; \
  57     case 1: Upgradelval.PrimType.S.makeUnsigned(); break; \
  58     case 2: Upgradelval.PrimType.S.makeSigned(); break; \
  59     default: assert(0 && "Invalid sign kind"); break; \
  60   }\
  61   return sym
  62
  63 namespace llvm {
  64
  65 // TODO: All of the static identifiers are figured out by the lexer,
  66 // these should be hashed to reduce the lexer size
  67
  68 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
  69 // appropriate character.  If AllowNull is set to false, a \00 value will cause
  70 // an exception to be thrown.
  71 //
  72 // If AllowNull is set to true, the return value of the function points to the
  73 // last character of the string in memory.
  74 //
  75 char *UnEscapeLexed(char *Buffer, bool AllowNull) {
  76   char *BOut = Buffer;
  77   for (char *BIn = Buffer; *BIn; ) {
  78     if (BIn[0] == '\\' && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
  79       char Tmp = BIn[3]; BIn[3] = 0;     // Terminate string
  80       *BOut = (char)strtol(BIn+1, 0, 16);  // Convert to number
  81       if (!AllowNull && !*BOut)
  82         error("String literal cannot accept \\00 escape!");
  83
  84       BIn[3] = Tmp;                  // Restore character
  85       BIn += 3;                      // Skip over handled chars
  86       ++BOut;
  87     } else {
  88       *BOut++ = *BIn++;
  89     }
  90   }
  91
  92   return BOut;
  93 }
  94
  95 // atoull - Convert an ascii string of decimal digits into the unsigned long
  96 // long representation... this does not have to do input error checking,
  97 // because we know that the input will be matched by a suitable regex...
  98 //
  99 static uint64_t atoull(const char *Buffer) {
 100   uint64_t Result = 0;
 101   for (; *Buffer; Buffer++) {
 102     uint64_t OldRes = Result;
 103     Result *= 10;
 104     Result += *Buffer-'0';
 105     if (Result < OldRes)   // Uh, oh, overflow detected!!!
 106       error("constant bigger than 64 bits detected!");
 107   }
 108   return Result;
 109 }
 110
 111 static uint64_t HexIntToVal(const char *Buffer) {
 112   uint64_t Result = 0;
 113   for (; *Buffer; ++Buffer) {
 114     uint64_t OldRes = Result;
 115     Result *= 16;
 116     char C = *Buffer;
 117     if (C >= '0' && C <= '9')
 118       Result += C-'0';
 119     else if (C >= 'A' && C <= 'F')
 120       Result += C-'A'+10;
 121     else if (C >= 'a' && C <= 'f')
 122       Result += C-'a'+10;
 123
 124     if (Result < OldRes)   // Uh, oh, overflow detected!!!
 125       error("constant bigger than 64 bits detected!");
 126   }
 127   return Result;
 128 }
 129
 130
 131 // HexToFP - Convert the ascii string in hexidecimal format to the floating
 132 // point representation of it.
 133 //
 134 static double HexToFP(const char *Buffer) {
 135   // Behave nicely in the face of C TBAA rules... see:
 136   // http://www.nullstone.com/htmls/category/aliastyp.htm
 137   union {
 138     uint64_t UI;
 139     double FP;
 140   } UIntToFP;
 141   UIntToFP.UI = HexIntToVal(Buffer);
 142
 143   assert(sizeof(double) == sizeof(uint64_t) &&
 144          "Data sizes incompatible on this target!");
 145   return UIntToFP.FP;   // Cast Hex constant to double
 146 }
 147
 148
 149 } // End llvm namespace
 150
 151 using namespace llvm;
 152
 153 %}
 154
 155
 156
 157 /* Comments start with a ; and go till end of line */
 158 Comment    ;.*
 159
 160 /* Variable(Value) identifiers start with a % sign */
 161 VarID       [%@][-a-zA-Z$._][-a-zA-Z$._0-9]*
 162
 163 /* Label identifiers end with a colon */
 164 Label       [-a-zA-Z$._0-9]+:
 165 QuoteLabel \"[^\"]+\":
 166
 167 /* Quoted names can contain any character except " and \ */
 168 StringConstant @?\"[^\"]*\"
 169
 170
 171 /* [PN]Integer: match positive and negative literal integer values that
 172  * are preceeded by a '%' character.  These represent unnamed variable slots.
 173  */
 174 EPInteger     %[0-9]+
 175 ENInteger    %-[0-9]+
 176
 177
 178 /* E[PN]Integer: match positive and negative literal integer values */
 179 PInteger   [0-9]+
 180 NInteger  -[0-9]+
 181
 182 /* FPConstant - A Floating point constant.
 183  */
 184 FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
 185
 186 /* HexFPConstant - Floating point constant represented in IEEE format as a
 187  *  hexadecimal number for when exponential notation is not precise enough.
 188  */
 189 HexFPConstant 0x[0-9A-Fa-f]+
 190
 191 /* HexIntConstant - Hexadecimal constant generated by the CFE to avoid forcing
 192  * it to deal with 64 bit numbers.
 193  */
 194 HexIntConstant [us]0x[0-9A-Fa-f]+
 195 %%
 196
 197 {Comment}       { /* Ignore comments for now */ }
 198
 199 begin           { return BEGINTOK; }
 200 end             { return ENDTOK; }
 201 true            { return TRUETOK;  }
 202 false           { return FALSETOK; }
 203 declare         { return DECLARE; }
 204 global          { return GLOBAL; }
 205 constant        { return CONSTANT; }
 206 internal        { return INTERNAL; }
 207 linkonce        { return LINKONCE; }
 208 weak            { return WEAK; }
 209 appending       { return APPENDING; }
 210 dllimport       { return DLLIMPORT; }
 211 dllexport       { return DLLEXPORT; }
 212 extern_weak     { return EXTERN_WEAK; }
 213 uninitialized   { return EXTERNAL; }    /* Deprecated, turn into external */
 214 external        { return EXTERNAL; }
 215 implementation  { return IMPLEMENTATION; }
 216 zeroinitializer { return ZEROINITIALIZER; }
 217 \.\.\.          { return DOTDOTDOT; }
 218 undef           { return UNDEF; }
 219 null            { return NULL_TOK; }
 220 to              { return TO; }
 221 except          { return EXCEPT; }
 222 not             { return NOT; }  /* Deprecated, turned into XOR */
 223 tail            { return TAIL; }
 224 target          { return TARGET; }
 225 triple          { return TRIPLE; }
 226 deplibs         { return DEPLIBS; }
 227 endian          { return ENDIAN; }
 228 pointersize     { return POINTERSIZE; }
 229 datalayout      { return DATALAYOUT; }
 230 little          { return LITTLE; }
 231 big             { return BIG; }
 232 volatile        { return VOLATILE; }
 233 align           { return ALIGN;  }
 234 section         { return SECTION; }
 235 module          { return MODULE; }
 236 asm             { return ASM_TOK; }
 237 sideeffect      { return SIDEEFFECT; }
 238
 239 cc              { return CC_TOK; }
 240 ccc             { return CCC_TOK; }
 241 csretcc         { return CSRETCC_TOK; }
 242 fastcc          { return FASTCC_TOK; }
 243 coldcc          { return COLDCC_TOK; }
 244 x86_stdcallcc   { return X86_STDCALLCC_TOK; }
 245 x86_fastcallcc  { return X86_FASTCALLCC_TOK; }
 246
 247 sbyte           { RET_TY(SBYTE,  Type::Int8Ty,  2); }
 248 ubyte           { RET_TY(UBYTE,  Type::Int8Ty,  1); }
 249 i8              { RET_TY(UBYTE,  Type::Int8Ty,  1); }
 250 short           { RET_TY(SHORT,  Type::Int16Ty, 2); }
 251 ushort          { RET_TY(USHORT, Type::Int16Ty, 1); }
 252 i16             { RET_TY(USHORT, Type::Int16Ty, 1); }
 253 int             { RET_TY(INT,    Type::Int32Ty, 2); }
 254 uint            { RET_TY(UINT,   Type::Int32Ty, 1); }
 255 i32             { RET_TY(UINT,   Type::Int32Ty, 1); }
 256 long            { RET_TY(LONG,   Type::Int64Ty, 2); }
 257 ulong           { RET_TY(ULONG,  Type::Int64Ty, 1); }
 258 i64             { RET_TY(ULONG,  Type::Int64Ty, 1); }
 259 void            { RET_TY(VOID,   Type::VoidTy,  0); }
 260 bool            { RET_TY(BOOL,   Type::Int1Ty,  1); }
 261 i1              { RET_TY(BOOL,   Type::Int1Ty,  1); }
 262 float           { RET_TY(FLOAT,  Type::FloatTy, 0); }
 263 double          { RET_TY(DOUBLE, Type::DoubleTy,0); }
 264 label           { RET_TY(LABEL,  Type::LabelTy, 0); }
 265 type            { return TYPE;   }
 266 opaque          { return OPAQUE; }
 267
 268 add             { RET_TOK(BinaryOpVal, AddOp, ADD); }
 269 sub             { RET_TOK(BinaryOpVal, SubOp, SUB); }
 270 mul             { RET_TOK(BinaryOpVal, MulOp, MUL); }
 271 div             { RET_TOK(BinaryOpVal, DivOp,  DIV); }
 272 udiv            { RET_TOK(BinaryOpVal, UDivOp, UDIV); }
 273 sdiv            { RET_TOK(BinaryOpVal, SDivOp, SDIV); }
 274 fdiv            { RET_TOK(BinaryOpVal, FDivOp, FDIV); }
 275 rem             { RET_TOK(BinaryOpVal, RemOp,  REM); }
 276 urem            { RET_TOK(BinaryOpVal, URemOp, UREM); }
 277 srem            { RET_TOK(BinaryOpVal, SRemOp, SREM); }
 278 frem            { RET_TOK(BinaryOpVal, FRemOp, FREM); }
 279 and             { RET_TOK(BinaryOpVal, AndOp, AND); }
 280 or              { RET_TOK(BinaryOpVal, OrOp , OR ); }
 281 xor             { RET_TOK(BinaryOpVal, XorOp, XOR); }
 282 setne           { RET_TOK(BinaryOpVal, SetNE, SETNE); }
 283 seteq           { RET_TOK(BinaryOpVal, SetEQ, SETEQ); }
 284 setlt           { RET_TOK(BinaryOpVal, SetLT, SETLT); }
 285 setgt           { RET_TOK(BinaryOpVal, SetGT, SETGT); }
 286 setle           { RET_TOK(BinaryOpVal, SetLE, SETLE); }
 287 setge           { RET_TOK(BinaryOpVal, SetGE, SETGE); }
 288 shl             { RET_TOK(BinaryOpVal, ShlOp, SHL); }
 289 shr             { RET_TOK(BinaryOpVal, ShrOp, SHR); }
 290 lshr            { RET_TOK(BinaryOpVal, LShrOp, LSHR); }
 291 ashr            { RET_TOK(BinaryOpVal, AShrOp, ASHR); }
 292
 293 icmp            { RET_TOK(OtherOpVal, ICmpOp, ICMP); }
 294 fcmp            { RET_TOK(OtherOpVal, FCmpOp, FCMP); }
 295
 296 eq              { return EQ; }
 297 ne              { return NE; }
 298 slt             { return SLT; }
 299 sgt             { return SGT; }
 300 sle             { return SLE; }
 301 sge             { return SGE; }
 302 ult             { return ULT; }
 303 ugt             { return UGT; }
 304 ule             { return ULE; }
 305 uge             { return UGE; }
 306 oeq             { return OEQ; }
 307 one             { return ONE; }
 308 olt             { return OLT; }
 309 ogt             { return OGT; }
 310 ole             { return OLE; }
 311 oge             { return OGE; }
 312 ord             { return ORD; }
 313 uno             { return UNO; }
 314 ueq             { return UEQ; }
 315 une             { return UNE; }
 316
 317 phi             { RET_TOK(OtherOpVal, PHIOp, PHI_TOK); }
 318 call            { RET_TOK(OtherOpVal, CallOp, CALL); }
 319 cast            { RET_TOK(CastOpVal, CastOp, CAST);  }
 320 trunc           { RET_TOK(CastOpVal, TruncOp, TRUNC); }
 321 zext            { RET_TOK(CastOpVal, ZExtOp , ZEXT); }
 322 sext            { RET_TOK(CastOpVal, SExtOp, SEXT); }
 323 fptrunc         { RET_TOK(CastOpVal, FPTruncOp, FPTRUNC); }
 324 fpext           { RET_TOK(CastOpVal, FPExtOp, FPEXT); }
 325 fptoui          { RET_TOK(CastOpVal, FPToUIOp, FPTOUI); }
 326 fptosi          { RET_TOK(CastOpVal, FPToSIOp, FPTOSI); }
 327 uitofp          { RET_TOK(CastOpVal, UIToFPOp, UITOFP); }
 328 sitofp          { RET_TOK(CastOpVal, SIToFPOp, SITOFP); }
 329 ptrtoint        { RET_TOK(CastOpVal, PtrToIntOp, PTRTOINT); }
 330 inttoptr        { RET_TOK(CastOpVal, IntToPtrOp, INTTOPTR); }
 331 bitcast         { RET_TOK(CastOpVal, BitCastOp, BITCAST); }
 332 select          { RET_TOK(OtherOpVal, SelectOp, SELECT); }
 333 vanext          { return VANEXT_old; }
 334 vaarg           { return VAARG_old; }
 335 va_arg          { RET_TOK(OtherOpVal, VAArg , VAARG); }
 336 ret             { RET_TOK(TermOpVal, RetOp, RET); }
 337 br              { RET_TOK(TermOpVal, BrOp, BR); }
 338 switch          { RET_TOK(TermOpVal, SwitchOp, SWITCH); }
 339 invoke          { RET_TOK(TermOpVal, InvokeOp, INVOKE); }
 340 unwind          { return UNWIND; }
 341 unreachable     { RET_TOK(TermOpVal, UnreachableOp, UNREACHABLE); }
 342
 343 malloc          { RET_TOK(MemOpVal, MallocOp, MALLOC); }
 344 alloca          { RET_TOK(MemOpVal, AllocaOp, ALLOCA); }
 345 free            { RET_TOK(MemOpVal, FreeOp, FREE); }
 346 load            { RET_TOK(MemOpVal, LoadOp, LOAD); }
 347 store           { RET_TOK(MemOpVal, StoreOp, STORE); }
 348 getelementptr   { RET_TOK(MemOpVal, GetElementPtrOp, GETELEMENTPTR); }
 349
 350 extractelement  { RET_TOK(OtherOpVal, ExtractElementOp, EXTRACTELEMENT); }
 351 insertelement   { RET_TOK(OtherOpVal, InsertElementOp, INSERTELEMENT); }
 352 shufflevector   { RET_TOK(OtherOpVal, ShuffleVectorOp, SHUFFLEVECTOR); }
 353
 354
 355 {VarID}         {
 356                   UnEscapeLexed(yytext+1);
 357                   Upgradelval.StrVal = strdup(yytext+1);             // Skip %
 358                   return VAR_ID;
 359                 }
 360 {Label}         {
 361                   yytext[strlen(yytext)-1] = 0;  // nuke colon
 362                   UnEscapeLexed(yytext);
 363                   Upgradelval.StrVal = strdup(yytext);
 364                   return LABELSTR;
 365                 }
 366 {QuoteLabel}    {
 367                   yytext[strlen(yytext)-2] = 0;  // nuke colon, end quote
 368                   UnEscapeLexed(yytext+1);
 369                   Upgradelval.StrVal = strdup(yytext+1);
 370                   return LABELSTR;
 371                 }
 372
 373 {StringConstant} { // Note that we cannot unescape a string constant here!  The
 374                    // string constant might contain a \00 which would not be
 375                    // understood by the string stuff.  It is valid to make a
 376                    // [sbyte] c"Hello World\00" constant, for example.
 377                    //
 378                    yytext[strlen(yytext)-1] = 0;           // nuke end quote
 379                    Upgradelval.StrVal = strdup(yytext+1);  // Nuke start quote
 380                    return STRINGCONSTANT;
 381                  }
 382
 383
 384 {PInteger}      { Upgradelval.UInt64Val = atoull(yytext); return EUINT64VAL; }
 385 {NInteger}      {
 386                   uint64_t Val = atoull(yytext+1);
 387                   // +1:  we have bigger negative range
 388                   if (Val > (uint64_t)INT64_MAX+1)
 389                     error("Constant too large for signed 64 bits!");
 390                   Upgradelval.SInt64Val = -Val;
 391                   return ESINT64VAL;
 392                 }
 393 {HexIntConstant} {
 394                    Upgradelval.UInt64Val = HexIntToVal(yytext+3);
 395                    return yytext[0] == 's' ? ESINT64VAL : EUINT64VAL;
 396                  }
 397
 398 {EPInteger}     {
 399                   uint64_t Val = atoull(yytext+1);
 400                   if ((unsigned)Val != Val)
 401                     error("Invalid value number (too large)!");
 402                   Upgradelval.UIntVal = unsigned(Val);
 403                   return UINTVAL;
 404                 }
 405 {ENInteger}     {
 406                   uint64_t Val = atoull(yytext+2);
 407                   // +1:  we have bigger negative range
 408                   if (Val > (uint64_t)INT32_MAX+1)
 409                     error("Constant too large for signed 32 bits!");
 410                   Upgradelval.SIntVal = (int)-Val;
 411                   return SINTVAL;
 412                 }
 413
 414 {FPConstant}    { Upgradelval.FPVal = new APFloat(atof(yytext)); return FPVAL; }
 415 {HexFPConstant} { Upgradelval.FPVal = new APFloat(HexToFP(yytext));
 416                   return FPVAL;
 417                 }
 418
 419 <<EOF>>         {
 420                   /* Make sure to free the internal buffers for flex when we are
 421                    * done reading our input!
 422                    */
 423                   yy_delete_buffer(YY_CURRENT_BUFFER);
 424                   return EOF;
 425                 }
 426
 427 [ \r\t\n]       { /* Ignore whitespace */ }
 428 .               { return yytext[0]; }
 429
 430 %%