lib/Target/X86/AsmParser/X86AsmParser.cpp

   1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/Target/TargetAsmParser.h"
  11 #include "X86.h"
  12 #include "X86Subtarget.h"
  13 #include "llvm/ADT/SmallString.h"
  14 #include "llvm/ADT/SmallVector.h"
  15 #include "llvm/ADT/StringSwitch.h"
  16 #include "llvm/ADT/Twine.h"
  17 #include "llvm/MC/MCStreamer.h"
  18 #include "llvm/MC/MCExpr.h"
  19 #include "llvm/MC/MCInst.h"
  20 #include "llvm/MC/MCParser/MCAsmLexer.h"
  21 #include "llvm/MC/MCParser/MCAsmParser.h"
  22 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  23 #include "llvm/Support/SourceMgr.h"
  24 #include "llvm/Support/raw_ostream.h"
  25 #include "llvm/Target/TargetRegistry.h"
  26 #include "llvm/Target/TargetAsmParser.h"
  27 using namespace llvm;
  28
  29 namespace {
  30 struct X86Operand;
  31
  32 class X86ATTAsmParser : public TargetAsmParser {
  33   MCAsmParser &Parser;
  34   TargetMachine &TM;
  35
  36 protected:
  37   unsigned Is64Bit : 1;
  38
  39 private:
  40   MCAsmParser &getParser() const { return Parser; }
  41
  42   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
  43
  44   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
  45
  46   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
  47
  48   X86Operand *ParseOperand();
  49   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
  50
  51   bool ParseDirectiveWord(unsigned Size, SMLoc L);
  52
  53   bool MatchInstruction(SMLoc IDLoc,
  54                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
  55                         MCInst &Inst);
  56
  57   /// @name Auto-generated Matcher Functions
  58   /// {
  59
  60 #define GET_ASSEMBLER_HEADER
  61 #include "X86GenAsmMatcher.inc"
  62
  63   /// }
  64
  65 public:
  66   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  67     : TargetAsmParser(T), Parser(_Parser), TM(TM) {
  68
  69     // Initialize the set of available features.
  70     setAvailableFeatures(ComputeAvailableFeatures(
  71                            &TM.getSubtarget<X86Subtarget>()));
  72   }
  73
  74   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
  75                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
  76
  77   virtual bool ParseDirective(AsmToken DirectiveID);
  78 };
  79
  80 class X86_32ATTAsmParser : public X86ATTAsmParser {
  81 public:
  82   X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  83     : X86ATTAsmParser(T, _Parser, TM) {
  84     Is64Bit = false;
  85   }
  86 };
  87
  88 class X86_64ATTAsmParser : public X86ATTAsmParser {
  89 public:
  90   X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  91     : X86ATTAsmParser(T, _Parser, TM) {
  92     Is64Bit = true;
  93   }
  94 };
  95
  96 } // end anonymous namespace
  97
  98 /// @name Auto-generated Match Functions
  99 /// {
 100
 101 static unsigned MatchRegisterName(StringRef Name);
 102
 103 /// }
 104
 105 namespace {
 106
 107 /// X86Operand - Instances of this class represent a parsed X86 machine
 108 /// instruction.
 109 struct X86Operand : public MCParsedAsmOperand {
 110   enum KindTy {
 111     Token,
 112     Register,
 113     Immediate,
 114     Memory
 115   } Kind;
 116
 117   SMLoc StartLoc, EndLoc;
 118
 119   union {
 120     struct {
 121       const char *Data;
 122       unsigned Length;
 123     } Tok;
 124
 125     struct {
 126       unsigned RegNo;
 127     } Reg;
 128
 129     struct {
 130       const MCExpr *Val;
 131     } Imm;
 132
 133     struct {
 134       unsigned SegReg;
 135       const MCExpr *Disp;
 136       unsigned BaseReg;
 137       unsigned IndexReg;
 138       unsigned Scale;
 139     } Mem;
 140   };
 141
 142   X86Operand(KindTy K, SMLoc Start, SMLoc End)
 143     : Kind(K), StartLoc(Start), EndLoc(End) {}
 144
 145   /// getStartLoc - Get the location of the first token of this operand.
 146   SMLoc getStartLoc() const { return StartLoc; }
 147   /// getEndLoc - Get the location of the last token of this operand.
 148   SMLoc getEndLoc() const { return EndLoc; }
 149
 150   virtual void dump(raw_ostream &OS) const {}
 151
 152   StringRef getToken() const {
 153     assert(Kind == Token && "Invalid access!");
 154     return StringRef(Tok.Data, Tok.Length);
 155   }
 156   void setTokenValue(StringRef Value) {
 157     assert(Kind == Token && "Invalid access!");
 158     Tok.Data = Value.data();
 159     Tok.Length = Value.size();
 160   }
 161
 162   unsigned getReg() const {
 163     assert(Kind == Register && "Invalid access!");
 164     return Reg.RegNo;
 165   }
 166
 167   const MCExpr *getImm() const {
 168     assert(Kind == Immediate && "Invalid access!");
 169     return Imm.Val;
 170   }
 171
 172   const MCExpr *getMemDisp() const {
 173     assert(Kind == Memory && "Invalid access!");
 174     return Mem.Disp;
 175   }
 176   unsigned getMemSegReg() const {
 177     assert(Kind == Memory && "Invalid access!");
 178     return Mem.SegReg;
 179   }
 180   unsigned getMemBaseReg() const {
 181     assert(Kind == Memory && "Invalid access!");
 182     return Mem.BaseReg;
 183   }
 184   unsigned getMemIndexReg() const {
 185     assert(Kind == Memory && "Invalid access!");
 186     return Mem.IndexReg;
 187   }
 188   unsigned getMemScale() const {
 189     assert(Kind == Memory && "Invalid access!");
 190     return Mem.Scale;
 191   }
 192
 193   bool isToken() const {return Kind == Token; }
 194
 195   bool isImm() const { return Kind == Immediate; }
 196
 197   bool isImmSExti16i8() const {
 198     if (!isImm())
 199       return false;
 200
 201     // If this isn't a constant expr, just assume it fits and let relaxation
 202     // handle it.
 203     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 204     if (!CE)
 205       return true;
 206
 207     // Otherwise, check the value is in a range that makes sense for this
 208     // extension.
 209     uint64_t Value = CE->getValue();
 210     return ((                                  Value <= 0x000000000000007FULL)||
 211             (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
 212             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 213   }
 214   bool isImmSExti32i8() const {
 215     if (!isImm())
 216       return false;
 217
 218     // If this isn't a constant expr, just assume it fits and let relaxation
 219     // handle it.
 220     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 221     if (!CE)
 222       return true;
 223
 224     // Otherwise, check the value is in a range that makes sense for this
 225     // extension.
 226     uint64_t Value = CE->getValue();
 227     return ((                                  Value <= 0x000000000000007FULL)||
 228             (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
 229             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 230   }
 231   bool isImmSExti64i8() const {
 232     if (!isImm())
 233       return false;
 234
 235     // If this isn't a constant expr, just assume it fits and let relaxation
 236     // handle it.
 237     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 238     if (!CE)
 239       return true;
 240
 241     // Otherwise, check the value is in a range that makes sense for this
 242     // extension.
 243     uint64_t Value = CE->getValue();
 244     return ((                                  Value <= 0x000000000000007FULL)||
 245             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 246   }
 247   bool isImmSExti64i32() const {
 248     if (!isImm())
 249       return false;
 250
 251     // If this isn't a constant expr, just assume it fits and let relaxation
 252     // handle it.
 253     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 254     if (!CE)
 255       return true;
 256
 257     // Otherwise, check the value is in a range that makes sense for this
 258     // extension.
 259     uint64_t Value = CE->getValue();
 260     return ((                                  Value <= 0x000000007FFFFFFFULL)||
 261             (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 262   }
 263
 264   bool isMem() const { return Kind == Memory; }
 265
 266   bool isAbsMem() const {
 267     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
 268       !getMemIndexReg() && getMemScale() == 1;
 269   }
 270
 271   bool isReg() const { return Kind == Register; }
 272
 273   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
 274     // Add as immediates when possible.
 275     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
 276       Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
 277     else
 278       Inst.addOperand(MCOperand::CreateExpr(Expr));
 279   }
 280
 281   void addRegOperands(MCInst &Inst, unsigned N) const {
 282     assert(N == 1 && "Invalid number of operands!");
 283     Inst.addOperand(MCOperand::CreateReg(getReg()));
 284   }
 285
 286   void addImmOperands(MCInst &Inst, unsigned N) const {
 287     assert(N == 1 && "Invalid number of operands!");
 288     addExpr(Inst, getImm());
 289   }
 290
 291   void addMemOperands(MCInst &Inst, unsigned N) const {
 292     assert((N == 5) && "Invalid number of operands!");
 293     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 294     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 295     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 296     addExpr(Inst, getMemDisp());
 297     Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
 298   }
 299
 300   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
 301     assert((N == 1) && "Invalid number of operands!");
 302     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 303   }
 304
 305   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
 306     X86Operand *Res = new X86Operand(Token, Loc, Loc);
 307     Res->Tok.Data = Str.data();
 308     Res->Tok.Length = Str.size();
 309     return Res;
 310   }
 311
 312   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
 313     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
 314     Res->Reg.RegNo = RegNo;
 315     return Res;
 316   }
 317
 318   static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
 319     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
 320     Res->Imm.Val = Val;
 321     return Res;
 322   }
 323
 324   /// Create an absolute memory operand.
 325   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
 326                                SMLoc EndLoc) {
 327     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 328     Res->Mem.SegReg   = 0;
 329     Res->Mem.Disp     = Disp;
 330     Res->Mem.BaseReg  = 0;
 331     Res->Mem.IndexReg = 0;
 332     Res->Mem.Scale    = 1;
 333     return Res;
 334   }
 335
 336   /// Create a generalized memory operand.
 337   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
 338                                unsigned BaseReg, unsigned IndexReg,
 339                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
 340     // We should never just have a displacement, that should be parsed as an
 341     // absolute memory operand.
 342     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
 343
 344     // The scale should always be one of {1,2,4,8}.
 345     assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
 346            "Invalid scale!");
 347     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 348     Res->Mem.SegReg   = SegReg;
 349     Res->Mem.Disp     = Disp;
 350     Res->Mem.BaseReg  = BaseReg;
 351     Res->Mem.IndexReg = IndexReg;
 352     Res->Mem.Scale    = Scale;
 353     return Res;
 354   }
 355 };
 356
 357 } // end anonymous namespace.
 358
 359
 360 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 361                                     SMLoc &StartLoc, SMLoc &EndLoc) {
 362   RegNo = 0;
 363   const AsmToken &TokPercent = Parser.getTok();
 364   assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
 365   StartLoc = TokPercent.getLoc();
 366   Parser.Lex(); // Eat percent token.
 367
 368   const AsmToken &Tok = Parser.getTok();
 369   if (Tok.isNot(AsmToken::Identifier))
 370     return Error(Tok.getLoc(), "invalid register name");
 371
 372   // FIXME: Validate register for the current architecture; we have to do
 373   // validation later, so maybe there is no need for this here.
 374   RegNo = MatchRegisterName(Tok.getString());
 375
 376   // FIXME: This should be done using Requires<In32BitMode> and
 377   // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
 378   // can be also checked.
 379   if (RegNo == X86::RIZ && !Is64Bit)
 380     return Error(Tok.getLoc(), "riz register in 64-bit mode only");
 381
 382   // Parse %st(1) and "%st" as "%st(0)"
 383   if (RegNo == 0 && Tok.getString() == "st") {
 384     RegNo = X86::ST0;
 385     EndLoc = Tok.getLoc();
 386     Parser.Lex(); // Eat 'st'
 387
 388     // Check to see if we have '(4)' after %st.
 389     if (getLexer().isNot(AsmToken::LParen))
 390       return false;
 391     // Lex the paren.
 392     getParser().Lex();
 393
 394     const AsmToken &IntTok = Parser.getTok();
 395     if (IntTok.isNot(AsmToken::Integer))
 396       return Error(IntTok.getLoc(), "expected stack index");
 397     switch (IntTok.getIntVal()) {
 398     case 0: RegNo = X86::ST0; break;
 399     case 1: RegNo = X86::ST1; break;
 400     case 2: RegNo = X86::ST2; break;
 401     case 3: RegNo = X86::ST3; break;
 402     case 4: RegNo = X86::ST4; break;
 403     case 5: RegNo = X86::ST5; break;
 404     case 6: RegNo = X86::ST6; break;
 405     case 7: RegNo = X86::ST7; break;
 406     default: return Error(IntTok.getLoc(), "invalid stack index");
 407     }
 408
 409     if (getParser().Lex().isNot(AsmToken::RParen))
 410       return Error(Parser.getTok().getLoc(), "expected ')'");
 411
 412     EndLoc = Tok.getLoc();
 413     Parser.Lex(); // Eat ')'
 414     return false;
 415   }
 416
 417   // If this is "db[0-7]", match it as an alias
 418   // for dr[0-7].
 419   if (RegNo == 0 && Tok.getString().size() == 3 &&
 420       Tok.getString().startswith("db")) {
 421     switch (Tok.getString()[2]) {
 422     case '0': RegNo = X86::DR0; break;
 423     case '1': RegNo = X86::DR1; break;
 424     case '2': RegNo = X86::DR2; break;
 425     case '3': RegNo = X86::DR3; break;
 426     case '4': RegNo = X86::DR4; break;
 427     case '5': RegNo = X86::DR5; break;
 428     case '6': RegNo = X86::DR6; break;
 429     case '7': RegNo = X86::DR7; break;
 430     }
 431
 432     if (RegNo != 0) {
 433       EndLoc = Tok.getLoc();
 434       Parser.Lex(); // Eat it.
 435       return false;
 436     }
 437   }
 438
 439   if (RegNo == 0)
 440     return Error(Tok.getLoc(), "invalid register name");
 441
 442   EndLoc = Tok.getLoc();
 443   Parser.Lex(); // Eat identifier token.
 444   return false;
 445 }
 446
 447 X86Operand *X86ATTAsmParser::ParseOperand() {
 448   switch (getLexer().getKind()) {
 449   default:
 450     // Parse a memory operand with no segment register.
 451     return ParseMemOperand(0, Parser.getTok().getLoc());
 452   case AsmToken::Percent: {
 453     // Read the register.
 454     unsigned RegNo;
 455     SMLoc Start, End;
 456     if (ParseRegister(RegNo, Start, End)) return 0;
 457     if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
 458       Error(Start, "eiz and riz can only be used as index registers");
 459       return 0;
 460     }
 461
 462     // If this is a segment register followed by a ':', then this is the start
 463     // of a memory reference, otherwise this is a normal register reference.
 464     if (getLexer().isNot(AsmToken::Colon))
 465       return X86Operand::CreateReg(RegNo, Start, End);
 466
 467
 468     getParser().Lex(); // Eat the colon.
 469     return ParseMemOperand(RegNo, Start);
 470   }
 471   case AsmToken::Dollar: {
 472     // $42 -> immediate.
 473     SMLoc Start = Parser.getTok().getLoc(), End;
 474     Parser.Lex();
 475     const MCExpr *Val;
 476     if (getParser().ParseExpression(Val, End))
 477       return 0;
 478     return X86Operand::CreateImm(Val, Start, End);
 479   }
 480   }
 481 }
 482
 483 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
 484 /// has already been parsed if present.
 485 X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 486
 487   // We have to disambiguate a parenthesized expression "(4+5)" from the start
 488   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
 489   // only way to do this without lookahead is to eat the '(' and see what is
 490   // after it.
 491   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
 492   if (getLexer().isNot(AsmToken::LParen)) {
 493     SMLoc ExprEnd;
 494     if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
 495
 496     // After parsing the base expression we could either have a parenthesized
 497     // memory address or not.  If not, return now.  If so, eat the (.
 498     if (getLexer().isNot(AsmToken::LParen)) {
 499       // Unless we have a segment register, treat this as an immediate.
 500       if (SegReg == 0)
 501         return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
 502       return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 503     }
 504
 505     // Eat the '('.
 506     Parser.Lex();
 507   } else {
 508     // Okay, we have a '('.  We don't know if this is an expression or not, but
 509     // so we have to eat the ( to see beyond it.
 510     SMLoc LParenLoc = Parser.getTok().getLoc();
 511     Parser.Lex(); // Eat the '('.
 512
 513     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
 514       // Nothing to do here, fall into the code below with the '(' part of the
 515       // memory operand consumed.
 516     } else {
 517       SMLoc ExprEnd;
 518
 519       // It must be an parenthesized expression, parse it now.
 520       if (getParser().ParseParenExpression(Disp, ExprEnd))
 521         return 0;
 522
 523       // After parsing the base expression we could either have a parenthesized
 524       // memory address or not.  If not, return now.  If so, eat the (.
 525       if (getLexer().isNot(AsmToken::LParen)) {
 526         // Unless we have a segment register, treat this as an immediate.
 527         if (SegReg == 0)
 528           return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
 529         return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 530       }
 531
 532       // Eat the '('.
 533       Parser.Lex();
 534     }
 535   }
 536
 537   // If we reached here, then we just ate the ( of the memory operand.  Process
 538   // the rest of the memory operand.
 539   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
 540
 541   if (getLexer().is(AsmToken::Percent)) {
 542     SMLoc L;
 543     if (ParseRegister(BaseReg, L, L)) return 0;
 544     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
 545       Error(L, "eiz and riz can only be used as index registers");
 546       return 0;
 547     }
 548   }
 549
 550   if (getLexer().is(AsmToken::Comma)) {
 551     Parser.Lex(); // Eat the comma.
 552
 553     // Following the comma we should have either an index register, or a scale
 554     // value. We don't support the later form, but we want to parse it
 555     // correctly.
 556     //
 557     // Not that even though it would be completely consistent to support syntax
 558     // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
 559     if (getLexer().is(AsmToken::Percent)) {
 560       SMLoc L;
 561       if (ParseRegister(IndexReg, L, L)) return 0;
 562
 563       if (getLexer().isNot(AsmToken::RParen)) {
 564         // Parse the scale amount:
 565         //  ::= ',' [scale-expression]
 566         if (getLexer().isNot(AsmToken::Comma)) {
 567           Error(Parser.getTok().getLoc(),
 568                 "expected comma in scale expression");
 569           return 0;
 570         }
 571         Parser.Lex(); // Eat the comma.
 572
 573         if (getLexer().isNot(AsmToken::RParen)) {
 574           SMLoc Loc = Parser.getTok().getLoc();
 575
 576           int64_t ScaleVal;
 577           if (getParser().ParseAbsoluteExpression(ScaleVal))
 578             return 0;
 579
 580           // Validate the scale amount.
 581           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
 582             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
 583             return 0;
 584           }
 585           Scale = (unsigned)ScaleVal;
 586         }
 587       }
 588     } else if (getLexer().isNot(AsmToken::RParen)) {
 589       // A scale amount without an index is ignored.
 590       // index.
 591       SMLoc Loc = Parser.getTok().getLoc();
 592
 593       int64_t Value;
 594       if (getParser().ParseAbsoluteExpression(Value))
 595         return 0;
 596
 597       if (Value != 1)
 598         Warning(Loc, "scale factor without index register is ignored");
 599       Scale = 1;
 600     }
 601   }
 602
 603   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
 604   if (getLexer().isNot(AsmToken::RParen)) {
 605     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
 606     return 0;
 607   }
 608   SMLoc MemEnd = Parser.getTok().getLoc();
 609   Parser.Lex(); // Eat the ')'.
 610
 611   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
 612                                MemStart, MemEnd);
 613 }
 614
 615 bool X86ATTAsmParser::
 616 ParseInstruction(StringRef Name, SMLoc NameLoc,
 617                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 618   // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
 619   // represent alternative syntaxes in the .td file, without requiring
 620   // instruction duplication.
 621   StringRef PatchedName = StringSwitch<StringRef>(Name)
 622     .Case("sal", "shl")
 623     .Case("salb", "shlb")
 624     .Case("sall", "shll")
 625     .Case("salq", "shlq")
 626     .Case("salw", "shlw")
 627     .Case("repe", "rep")
 628     .Case("repz", "rep")
 629     .Case("repnz", "repne")
 630     .Case("iret", "iretl")
 631     .Case("sysret", "sysretl")
 632     .Case("push", Is64Bit ? "pushq" : "pushl")
 633     .Case("pop", Is64Bit ? "popq" : "popl")
 634     .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
 635     .Case("popf",  Is64Bit ? "popfq"  : "popfl")
 636     .Case("pushfd", "pushfl")
 637     .Case("popfd",  "popfl")
 638     .Case("retl", Is64Bit ? "retl" : "ret")
 639     .Case("retq", Is64Bit ? "ret" : "retq")
 640     .Case("setz", "sete")  .Case("setnz", "setne")
 641     .Case("setc", "setb")  .Case("setna", "setbe")
 642     .Case("setnae", "setb").Case("setnb", "setae")
 643     .Case("setnbe", "seta").Case("setnc", "setae")
 644     .Case("setng", "setle").Case("setnge", "setl")
 645     .Case("setnl", "setge").Case("setnle", "setg")
 646     .Case("setpe", "setp") .Case("setpo", "setnp")
 647     .Case("jz", "je")  .Case("jnz", "jne")
 648     .Case("jc", "jb")  .Case("jna", "jbe")
 649     .Case("jnae", "jb").Case("jnb", "jae")
 650     .Case("jnbe", "ja").Case("jnc", "jae")
 651     .Case("jng", "jle").Case("jnge", "jl")
 652     .Case("jnl", "jge").Case("jnle", "jg")
 653     .Case("jpe", "jp") .Case("jpo", "jnp")
 654     // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands.
 655     .Case("cmovcw",  "cmovbw") .Case("cmovcl",  "cmovbl")
 656     .Case("cmovcq",  "cmovbq") .Case("cmovc",   "cmovb")
 657     .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel")
 658     .Case("cmovnaq", "cmovbeq").Case("cmovna",  "cmovbe")
 659     .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael")
 660     .Case("cmovnbq", "cmovaeq").Case("cmovnb",  "cmovae")
 661     .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval")
 662     .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova")
 663     .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael")
 664     .Case("cmovncq", "cmovaeq").Case("cmovnc",  "cmovae")
 665     .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
 666     .Case("cmovngq", "cmovleq").Case("cmovng",  "cmovle")
 667     .Case("cmovnw",  "cmovgew").Case("cmovnl",  "cmovgel")
 668     .Case("cmovnq",  "cmovgeq").Case("cmovn",   "cmovge")
 669     .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
 670     .Case("cmovngq", "cmovleq").Case("cmovng",  "cmovle")
 671     .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll")
 672     .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl")
 673     .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel")
 674     .Case("cmovnlq", "cmovgeq").Case("cmovnl",  "cmovge")
 675     .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl")
 676     .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg")
 677     .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel")
 678     .Case("cmovnzq", "cmovneq").Case("cmovnz",  "cmovne")
 679     .Case("cmovzw",  "cmovew") .Case("cmovzl",  "cmovel")
 680     .Case("cmovzq",  "cmoveq") .Case("cmovz",   "cmove")
 681     .Case("fwait", "wait")
 682     .Case("movzx", "movzb")
 683     .Default(Name);
 684
 685   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
 686   const MCExpr *ExtraImmOp = 0;
 687   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
 688       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
 689        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
 690     bool IsVCMP = PatchedName.startswith("vcmp");
 691     unsigned SSECCIdx = IsVCMP ? 4 : 3;
 692     unsigned SSEComparisonCode = StringSwitch<unsigned>(
 693       PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
 694       .Case("eq",          0)
 695       .Case("lt",          1)
 696       .Case("le",          2)
 697       .Case("unord",       3)
 698       .Case("neq",         4)
 699       .Case("nlt",         5)
 700       .Case("nle",         6)
 701       .Case("ord",         7)
 702       .Case("eq_uq",       8)
 703       .Case("nge",         9)
 704       .Case("ngt",      0x0A)
 705       .Case("false",    0x0B)
 706       .Case("neq_oq",   0x0C)
 707       .Case("ge",       0x0D)
 708       .Case("gt",       0x0E)
 709       .Case("true",     0x0F)
 710       .Case("eq_os",    0x10)
 711       .Case("lt_oq",    0x11)
 712       .Case("le_oq",    0x12)
 713       .Case("unord_s",  0x13)
 714       .Case("neq_us",   0x14)
 715       .Case("nlt_uq",   0x15)
 716       .Case("nle_uq",   0x16)
 717       .Case("ord_s",    0x17)
 718       .Case("eq_us",    0x18)
 719       .Case("nge_uq",   0x19)
 720       .Case("ngt_uq",   0x1A)
 721       .Case("false_os", 0x1B)
 722       .Case("neq_os",   0x1C)
 723       .Case("ge_oq",    0x1D)
 724       .Case("gt_oq",    0x1E)
 725       .Case("true_us",  0x1F)
 726       .Default(~0U);
 727     if (SSEComparisonCode != ~0U) {
 728       ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
 729                                           getParser().getContext());
 730       if (PatchedName.endswith("ss")) {
 731         PatchedName = IsVCMP ? "vcmpss" : "cmpss";
 732       } else if (PatchedName.endswith("sd")) {
 733         PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
 734       } else if (PatchedName.endswith("ps")) {
 735         PatchedName = IsVCMP ? "vcmpps" : "cmpps";
 736       } else {
 737         assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
 738         PatchedName = IsVCMP ? "vcmppd" : "cmppd";
 739       }
 740     }
 741   }
 742
 743   // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
 744   if (PatchedName.startswith("vpclmul")) {
 745     unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
 746       PatchedName.slice(7, PatchedName.size() - 2))
 747       .Case("lqlq", 0x00) // src1[63:0],   src2[63:0]
 748       .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
 749       .Case("lqhq", 0x10) // src1[63:0],   src2[127:64]
 750       .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
 751       .Default(~0U);
 752     if (CLMULQuadWordSelect != ~0U) {
 753       ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
 754                                           getParser().getContext());
 755       assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
 756       PatchedName = "vpclmulqdq";
 757     }
 758   }
 759
 760   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 761
 762   if (ExtraImmOp)
 763     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 764
 765
 766   // Determine whether this is an instruction prefix.
 767   bool isPrefix =
 768     PatchedName == "lock" || PatchedName == "rep" ||
 769     PatchedName == "repne";
 770
 771
 772   // This does the actual operand parsing.  Don't parse any more if we have a
 773   // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
 774   // just want to parse the "lock" as the first instruction and the "incl" as
 775   // the next one.
 776   if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
 777
 778     // Parse '*' modifier.
 779     if (getLexer().is(AsmToken::Star)) {
 780       SMLoc Loc = Parser.getTok().getLoc();
 781       Operands.push_back(X86Operand::CreateToken("*", Loc));
 782       Parser.Lex(); // Eat the star.
 783     }
 784
 785     // Read the first operand.
 786     if (X86Operand *Op = ParseOperand())
 787       Operands.push_back(Op);
 788     else {
 789       Parser.EatToEndOfStatement();
 790       return true;
 791     }
 792
 793     while (getLexer().is(AsmToken::Comma)) {
 794       Parser.Lex();  // Eat the comma.
 795
 796       // Parse and remember the operand.
 797       if (X86Operand *Op = ParseOperand())
 798         Operands.push_back(Op);
 799       else {
 800         Parser.EatToEndOfStatement();
 801         return true;
 802       }
 803     }
 804
 805     if (getLexer().isNot(AsmToken::EndOfStatement)) {
 806       Parser.EatToEndOfStatement();
 807       return TokError("unexpected token in argument list");
 808     }
 809   }
 810
 811   if (getLexer().is(AsmToken::EndOfStatement))
 812     Parser.Lex(); // Consume the EndOfStatement
 813
 814   // FIXME: Hack to handle recognize s{hr,ar,hl} <op>, $1.  Canonicalize to
 815   // "shift <op>".
 816   if ((Name.startswith("shr") || Name.startswith("sar") ||
 817        Name.startswith("shl")) &&
 818       Operands.size() == 3) {
 819     X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
 820     if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
 821         cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
 822       delete Operands[1];
 823       Operands.erase(Operands.begin() + 1);
 824     }
 825   }
 826
 827   // FIXME: Hack to handle recognize "in[bwl] <op>".  Canonicalize it to
 828   // "inb <op>, %al".
 829   if ((Name == "inb" || Name == "inw" || Name == "inl") &&
 830       Operands.size() == 2) {
 831     unsigned Reg;
 832     if (Name[2] == 'b')
 833       Reg = MatchRegisterName("al");
 834     else if (Name[2] == 'w')
 835       Reg = MatchRegisterName("ax");
 836     else
 837       Reg = MatchRegisterName("eax");
 838     SMLoc Loc = Operands.back()->getEndLoc();
 839     Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
 840   }
 841
 842   // FIXME: Hack to handle recognize "out[bwl] <op>".  Canonicalize it to
 843   // "outb %al, <op>".
 844   if ((Name == "outb" || Name == "outw" || Name == "outl") &&
 845       Operands.size() == 2) {
 846     unsigned Reg;
 847     if (Name[3] == 'b')
 848       Reg = MatchRegisterName("al");
 849     else if (Name[3] == 'w')
 850       Reg = MatchRegisterName("ax");
 851     else
 852       Reg = MatchRegisterName("eax");
 853     SMLoc Loc = Operands.back()->getEndLoc();
 854     Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
 855     std::swap(Operands[1], Operands[2]);
 856   }
 857
 858   // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
 859   // "f{mul*,add*,sub*,div*} $op"
 860   if ((Name.startswith("fmul") || Name.startswith("fadd") ||
 861        Name.startswith("fsub") || Name.startswith("fdiv")) &&
 862       Operands.size() == 3 &&
 863       static_cast<X86Operand*>(Operands[2])->isReg() &&
 864       static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
 865     delete Operands[2];
 866     Operands.erase(Operands.begin() + 2);
 867   }
 868
 869   // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
 870   // B".
 871   if (Name.startswith("imul") && Operands.size() == 3 &&
 872       static_cast<X86Operand*>(Operands[1])->isImm() &&
 873       static_cast<X86Operand*>(Operands.back())->isReg()) {
 874     X86Operand *Op = static_cast<X86Operand*>(Operands.back());
 875     Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
 876                                              Op->getEndLoc()));
 877   }
 878
 879   // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
 880   // effect (both store to a 16-bit mem).  Force to sldtw to avoid ambiguity
 881   // errors, since its encoding is the most compact.
 882   if (Name == "sldt" && Operands.size() == 2 &&
 883       static_cast<X86Operand*>(Operands[1])->isMem()) {
 884     delete Operands[0];
 885     Operands[0] = X86Operand::CreateToken("sldtw", NameLoc);
 886   }
 887
 888   // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as
 889   // synonyms.  Our tables only have the "<reg>, <mem>" form, so if we see the
 890   // other operand order, swap them.
 891   if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq"||
 892       Name == "xchg")
 893     if (Operands.size() == 3 &&
 894         static_cast<X86Operand*>(Operands[1])->isMem() &&
 895         static_cast<X86Operand*>(Operands[2])->isReg()) {
 896       std::swap(Operands[1], Operands[2]);
 897     }
 898
 899   // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as
 900   // synonyms.  Our tables only have the "<mem>, <reg>" form, so if we see the
 901   // other operand order, swap them.
 902   if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq"||
 903       Name == "test")
 904     if (Operands.size() == 3 &&
 905         static_cast<X86Operand*>(Operands[1])->isReg() &&
 906         static_cast<X86Operand*>(Operands[2])->isMem()) {
 907       std::swap(Operands[1], Operands[2]);
 908     }
 909
 910   return false;
 911 }
 912
 913 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
 914   StringRef IDVal = DirectiveID.getIdentifier();
 915   if (IDVal == ".word")
 916     return ParseDirectiveWord(2, DirectiveID.getLoc());
 917   return true;
 918 }
 919
 920 /// ParseDirectiveWord
 921 ///  ::= .word [ expression (, expression)* ]
 922 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 923   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 924     for (;;) {
 925       const MCExpr *Value;
 926       if (getParser().ParseExpression(Value))
 927         return true;
 928
 929       getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
 930
 931       if (getLexer().is(AsmToken::EndOfStatement))
 932         break;
 933
 934       // FIXME: Improve diagnostic.
 935       if (getLexer().isNot(AsmToken::Comma))
 936         return Error(L, "unexpected token in directive");
 937       Parser.Lex();
 938     }
 939   }
 940
 941   Parser.Lex();
 942   return false;
 943 }
 944
 945
 946 bool
 947 X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
 948                                   const SmallVectorImpl<MCParsedAsmOperand*>
 949                                     &Operands,
 950                                   MCInst &Inst) {
 951   assert(!Operands.empty() && "Unexpect empty operand list!");
 952
 953   bool WasOriginallyInvalidOperand = false;
 954   unsigned OrigErrorInfo;
 955
 956   // First, try a direct match.
 957   switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
 958   case Match_Success:
 959     return false;
 960   case Match_MissingFeature:
 961     Error(IDLoc, "instruction requires a CPU feature not currently enabled");
 962     return true;
 963   case Match_InvalidOperand:
 964     WasOriginallyInvalidOperand = true;
 965     break;
 966   case Match_MnemonicFail:
 967     break;
 968   }
 969
 970   // FIXME: Ideally, we would only attempt suffix matches for things which are
 971   // valid prefixes, and we could just infer the right unambiguous
 972   // type. However, that requires substantially more matcher support than the
 973   // following hack.
 974
 975   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
 976   assert(Op->isToken() && "Leading operand should always be a mnemonic!");
 977
 978   // Change the operand to point to a temporary token.
 979   StringRef Base = Op->getToken();
 980   SmallString<16> Tmp;
 981   Tmp += Base;
 982   Tmp += ' ';
 983   Op->setTokenValue(Tmp.str());
 984
 985   // Check for the various suffix matches.
 986   Tmp[Base.size()] = 'b';
 987   unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo;
 988   MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo);
 989   Tmp[Base.size()] = 'w';
 990   MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo);
 991   Tmp[Base.size()] = 'l';
 992   MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo);
 993   Tmp[Base.size()] = 'q';
 994   MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo);
 995
 996   // Restore the old token.
 997   Op->setTokenValue(Base);
 998
 999   // If exactly one matched, then we treat that as a successful match (and the
1000   // instruction will already have been filled in correctly, since the failing
1001   // matches won't have modified it).
1002   unsigned NumSuccessfulMatches =
1003     (MatchB == Match_Success) + (MatchW == Match_Success) +
1004     (MatchL == Match_Success) + (MatchQ == Match_Success);
1005   if (NumSuccessfulMatches == 1)
1006     return false;
1007
1008   // Otherwise, the match failed, try to produce a decent error message.
1009
1010   // If we had multiple suffix matches, then identify this as an ambiguous
1011   // match.
1012   if (NumSuccessfulMatches > 1) {
1013     char MatchChars[4];
1014     unsigned NumMatches = 0;
1015     if (MatchB == Match_Success)
1016       MatchChars[NumMatches++] = 'b';
1017     if (MatchW == Match_Success)
1018       MatchChars[NumMatches++] = 'w';
1019     if (MatchL == Match_Success)
1020       MatchChars[NumMatches++] = 'l';
1021     if (MatchQ == Match_Success)
1022       MatchChars[NumMatches++] = 'q';
1023
1024     SmallString<126> Msg;
1025     raw_svector_ostream OS(Msg);
1026     OS << "ambiguous instructions require an explicit suffix (could be ";
1027     for (unsigned i = 0; i != NumMatches; ++i) {
1028       if (i != 0)
1029         OS << ", ";
1030       if (i + 1 == NumMatches)
1031         OS << "or ";
1032       OS << "'" << Base << MatchChars[i] << "'";
1033     }
1034     OS << ")";
1035     Error(IDLoc, OS.str());
1036     return true;
1037   }
1038
1039   // Okay, we know that none of the variants matched successfully.
1040
1041   // If all of the instructions reported an invalid mnemonic, then the original
1042   // mnemonic was invalid.
1043   if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) &&
1044       (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) {
1045     if (!WasOriginallyInvalidOperand) {
1046       Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
1047       return true;
1048     }
1049
1050     // Recover location info for the operand if we know which was the problem.
1051     SMLoc ErrorLoc = IDLoc;
1052     if (OrigErrorInfo != ~0U) {
1053       ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
1054       if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
1055     }
1056
1057     Error(ErrorLoc, "invalid operand for instruction");
1058     return true;
1059   }
1060
1061   // If one instruction matched with a missing feature, report this as a
1062   // missing feature.
1063   if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) +
1064       (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){
1065     Error(IDLoc, "instruction requires a CPU feature not currently enabled");
1066     return true;
1067   }
1068
1069   // If one instruction matched with an invalid operand, report this as an
1070   // operand failure.
1071   if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) +
1072       (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){
1073     Error(IDLoc, "invalid operand for instruction");
1074     return true;
1075   }
1076
1077   // If all of these were an outright failure, report it in a useless way.
1078   // FIXME: We should give nicer diagnostics about the exact failure.
1079   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
1080   return true;
1081 }
1082
1083
1084 extern "C" void LLVMInitializeX86AsmLexer();
1085
1086 // Force static initialization.
1087 extern "C" void LLVMInitializeX86AsmParser() {
1088   RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
1089   RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
1090   LLVMInitializeX86AsmLexer();
1091 }
1092
1093 #define GET_REGISTER_MATCHER
1094 #define GET_MATCHER_IMPLEMENTATION
1095 #include "X86GenAsmMatcher.inc"