lib/Target/X86/AsmParser/X86AsmParser.cpp

   1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/Target/TargetAsmParser.h"
  11 #include "X86.h"
  12 #include "X86Subtarget.h"
  13 #include "llvm/ADT/SmallString.h"
  14 #include "llvm/ADT/SmallVector.h"
  15 #include "llvm/ADT/StringSwitch.h"
  16 #include "llvm/ADT/Twine.h"
  17 #include "llvm/MC/MCStreamer.h"
  18 #include "llvm/MC/MCExpr.h"
  19 #include "llvm/MC/MCInst.h"
  20 #include "llvm/MC/MCParser/MCAsmLexer.h"
  21 #include "llvm/MC/MCParser/MCAsmParser.h"
  22 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  23 #include "llvm/Support/SourceMgr.h"
  24 #include "llvm/Support/raw_ostream.h"
  25 #include "llvm/Target/TargetRegistry.h"
  26 #include "llvm/Target/TargetAsmParser.h"
  27 using namespace llvm;
  28
  29 namespace {
  30 struct X86Operand;
  31
  32 class X86ATTAsmParser : public TargetAsmParser {
  33   MCAsmParser &Parser;
  34   TargetMachine &TM;
  35
  36 protected:
  37   unsigned Is64Bit : 1;
  38
  39 private:
  40   MCAsmParser &getParser() const { return Parser; }
  41
  42   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
  43
  44   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
  45
  46   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
  47
  48   X86Operand *ParseOperand();
  49   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
  50
  51   bool ParseDirectiveWord(unsigned Size, SMLoc L);
  52
  53   bool MatchInstruction(SMLoc IDLoc,
  54                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
  55                         MCInst &Inst);
  56
  57   /// @name Auto-generated Matcher Functions
  58   /// {
  59
  60   unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
  61
  62   bool MatchInstructionImpl(
  63     const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
  64
  65   /// }
  66
  67 public:
  68   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  69     : TargetAsmParser(T), Parser(_Parser), TM(TM) {
  70
  71     // Initialize the set of available features.
  72     setAvailableFeatures(ComputeAvailableFeatures(
  73                            &TM.getSubtarget<X86Subtarget>()));
  74   }
  75
  76   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
  77                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
  78
  79   virtual bool ParseDirective(AsmToken DirectiveID);
  80 };
  81
  82 class X86_32ATTAsmParser : public X86ATTAsmParser {
  83 public:
  84   X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  85     : X86ATTAsmParser(T, _Parser, TM) {
  86     Is64Bit = false;
  87   }
  88 };
  89
  90 class X86_64ATTAsmParser : public X86ATTAsmParser {
  91 public:
  92   X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  93     : X86ATTAsmParser(T, _Parser, TM) {
  94     Is64Bit = true;
  95   }
  96 };
  97
  98 } // end anonymous namespace
  99
 100 /// @name Auto-generated Match Functions
 101 /// {
 102
 103 static unsigned MatchRegisterName(StringRef Name);
 104
 105 /// }
 106
 107 namespace {
 108
 109 /// X86Operand - Instances of this class represent a parsed X86 machine
 110 /// instruction.
 111 struct X86Operand : public MCParsedAsmOperand {
 112   enum KindTy {
 113     Token,
 114     Register,
 115     Immediate,
 116     Memory
 117   } Kind;
 118
 119   SMLoc StartLoc, EndLoc;
 120
 121   union {
 122     struct {
 123       const char *Data;
 124       unsigned Length;
 125     } Tok;
 126
 127     struct {
 128       unsigned RegNo;
 129     } Reg;
 130
 131     struct {
 132       const MCExpr *Val;
 133     } Imm;
 134
 135     struct {
 136       unsigned SegReg;
 137       const MCExpr *Disp;
 138       unsigned BaseReg;
 139       unsigned IndexReg;
 140       unsigned Scale;
 141     } Mem;
 142   };
 143
 144   X86Operand(KindTy K, SMLoc Start, SMLoc End)
 145     : Kind(K), StartLoc(Start), EndLoc(End) {}
 146
 147   /// getStartLoc - Get the location of the first token of this operand.
 148   SMLoc getStartLoc() const { return StartLoc; }
 149   /// getEndLoc - Get the location of the last token of this operand.
 150   SMLoc getEndLoc() const { return EndLoc; }
 151
 152   virtual void dump(raw_ostream &OS) const {}
 153
 154   StringRef getToken() const {
 155     assert(Kind == Token && "Invalid access!");
 156     return StringRef(Tok.Data, Tok.Length);
 157   }
 158   void setTokenValue(StringRef Value) {
 159     assert(Kind == Token && "Invalid access!");
 160     Tok.Data = Value.data();
 161     Tok.Length = Value.size();
 162   }
 163
 164   unsigned getReg() const {
 165     assert(Kind == Register && "Invalid access!");
 166     return Reg.RegNo;
 167   }
 168
 169   const MCExpr *getImm() const {
 170     assert(Kind == Immediate && "Invalid access!");
 171     return Imm.Val;
 172   }
 173
 174   const MCExpr *getMemDisp() const {
 175     assert(Kind == Memory && "Invalid access!");
 176     return Mem.Disp;
 177   }
 178   unsigned getMemSegReg() const {
 179     assert(Kind == Memory && "Invalid access!");
 180     return Mem.SegReg;
 181   }
 182   unsigned getMemBaseReg() const {
 183     assert(Kind == Memory && "Invalid access!");
 184     return Mem.BaseReg;
 185   }
 186   unsigned getMemIndexReg() const {
 187     assert(Kind == Memory && "Invalid access!");
 188     return Mem.IndexReg;
 189   }
 190   unsigned getMemScale() const {
 191     assert(Kind == Memory && "Invalid access!");
 192     return Mem.Scale;
 193   }
 194
 195   bool isToken() const {return Kind == Token; }
 196
 197   bool isImm() const { return Kind == Immediate; }
 198
 199   bool isImmSExti16i8() const {
 200     if (!isImm())
 201       return false;
 202
 203     // If this isn't a constant expr, just assume it fits and let relaxation
 204     // handle it.
 205     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 206     if (!CE)
 207       return true;
 208
 209     // Otherwise, check the value is in a range that makes sense for this
 210     // extension.
 211     uint64_t Value = CE->getValue();
 212     return ((                                  Value <= 0x000000000000007FULL)||
 213             (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
 214             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 215   }
 216   bool isImmSExti32i8() const {
 217     if (!isImm())
 218       return false;
 219
 220     // If this isn't a constant expr, just assume it fits and let relaxation
 221     // handle it.
 222     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 223     if (!CE)
 224       return true;
 225
 226     // Otherwise, check the value is in a range that makes sense for this
 227     // extension.
 228     uint64_t Value = CE->getValue();
 229     return ((                                  Value <= 0x000000000000007FULL)||
 230             (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
 231             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 232   }
 233   bool isImmSExti64i8() const {
 234     if (!isImm())
 235       return false;
 236
 237     // If this isn't a constant expr, just assume it fits and let relaxation
 238     // handle it.
 239     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 240     if (!CE)
 241       return true;
 242
 243     // Otherwise, check the value is in a range that makes sense for this
 244     // extension.
 245     uint64_t Value = CE->getValue();
 246     return ((                                  Value <= 0x000000000000007FULL)||
 247             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 248   }
 249   bool isImmSExti64i32() const {
 250     if (!isImm())
 251       return false;
 252
 253     // If this isn't a constant expr, just assume it fits and let relaxation
 254     // handle it.
 255     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 256     if (!CE)
 257       return true;
 258
 259     // Otherwise, check the value is in a range that makes sense for this
 260     // extension.
 261     uint64_t Value = CE->getValue();
 262     return ((                                  Value <= 0x000000007FFFFFFFULL)||
 263             (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 264   }
 265
 266   bool isMem() const { return Kind == Memory; }
 267
 268   bool isAbsMem() const {
 269     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
 270       !getMemIndexReg() && getMemScale() == 1;
 271   }
 272
 273   bool isReg() const { return Kind == Register; }
 274
 275   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
 276     // Add as immediates when possible.
 277     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
 278       Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
 279     else
 280       Inst.addOperand(MCOperand::CreateExpr(Expr));
 281   }
 282
 283   void addRegOperands(MCInst &Inst, unsigned N) const {
 284     assert(N == 1 && "Invalid number of operands!");
 285     Inst.addOperand(MCOperand::CreateReg(getReg()));
 286   }
 287
 288   void addImmOperands(MCInst &Inst, unsigned N) const {
 289     assert(N == 1 && "Invalid number of operands!");
 290     addExpr(Inst, getImm());
 291   }
 292
 293   void addMemOperands(MCInst &Inst, unsigned N) const {
 294     assert((N == 5) && "Invalid number of operands!");
 295     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 296     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 297     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 298     addExpr(Inst, getMemDisp());
 299     Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
 300   }
 301
 302   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
 303     assert((N == 1) && "Invalid number of operands!");
 304     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 305   }
 306
 307   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
 308     X86Operand *Res = new X86Operand(Token, Loc, Loc);
 309     Res->Tok.Data = Str.data();
 310     Res->Tok.Length = Str.size();
 311     return Res;
 312   }
 313
 314   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
 315     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
 316     Res->Reg.RegNo = RegNo;
 317     return Res;
 318   }
 319
 320   static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
 321     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
 322     Res->Imm.Val = Val;
 323     return Res;
 324   }
 325
 326   /// Create an absolute memory operand.
 327   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
 328                                SMLoc EndLoc) {
 329     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 330     Res->Mem.SegReg   = 0;
 331     Res->Mem.Disp     = Disp;
 332     Res->Mem.BaseReg  = 0;
 333     Res->Mem.IndexReg = 0;
 334     Res->Mem.Scale    = 1;
 335     return Res;
 336   }
 337
 338   /// Create a generalized memory operand.
 339   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
 340                                unsigned BaseReg, unsigned IndexReg,
 341                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
 342     // We should never just have a displacement, that should be parsed as an
 343     // absolute memory operand.
 344     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
 345
 346     // The scale should always be one of {1,2,4,8}.
 347     assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
 348            "Invalid scale!");
 349     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 350     Res->Mem.SegReg   = SegReg;
 351     Res->Mem.Disp     = Disp;
 352     Res->Mem.BaseReg  = BaseReg;
 353     Res->Mem.IndexReg = IndexReg;
 354     Res->Mem.Scale    = Scale;
 355     return Res;
 356   }
 357 };
 358
 359 } // end anonymous namespace.
 360
 361
 362 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 363                                     SMLoc &StartLoc, SMLoc &EndLoc) {
 364   RegNo = 0;
 365   const AsmToken &TokPercent = Parser.getTok();
 366   assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
 367   StartLoc = TokPercent.getLoc();
 368   Parser.Lex(); // Eat percent token.
 369
 370   const AsmToken &Tok = Parser.getTok();
 371   if (Tok.isNot(AsmToken::Identifier))
 372     return Error(Tok.getLoc(), "invalid register name");
 373
 374   // FIXME: Validate register for the current architecture; we have to do
 375   // validation later, so maybe there is no need for this here.
 376   RegNo = MatchRegisterName(Tok.getString());
 377
 378   // FIXME: This should be done using Requires<In32BitMode> and
 379   // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
 380   // can be also checked.
 381   if (RegNo == X86::RIZ && !Is64Bit)
 382     return Error(Tok.getLoc(), "riz register in 64-bit mode only");
 383
 384   // Parse %st(1) and "%st" as "%st(0)"
 385   if (RegNo == 0 && Tok.getString() == "st") {
 386     RegNo = X86::ST0;
 387     EndLoc = Tok.getLoc();
 388     Parser.Lex(); // Eat 'st'
 389
 390     // Check to see if we have '(4)' after %st.
 391     if (getLexer().isNot(AsmToken::LParen))
 392       return false;
 393     // Lex the paren.
 394     getParser().Lex();
 395
 396     const AsmToken &IntTok = Parser.getTok();
 397     if (IntTok.isNot(AsmToken::Integer))
 398       return Error(IntTok.getLoc(), "expected stack index");
 399     switch (IntTok.getIntVal()) {
 400     case 0: RegNo = X86::ST0; break;
 401     case 1: RegNo = X86::ST1; break;
 402     case 2: RegNo = X86::ST2; break;
 403     case 3: RegNo = X86::ST3; break;
 404     case 4: RegNo = X86::ST4; break;
 405     case 5: RegNo = X86::ST5; break;
 406     case 6: RegNo = X86::ST6; break;
 407     case 7: RegNo = X86::ST7; break;
 408     default: return Error(IntTok.getLoc(), "invalid stack index");
 409     }
 410
 411     if (getParser().Lex().isNot(AsmToken::RParen))
 412       return Error(Parser.getTok().getLoc(), "expected ')'");
 413
 414     EndLoc = Tok.getLoc();
 415     Parser.Lex(); // Eat ')'
 416     return false;
 417   }
 418
 419   // If this is "db[0-7]", match it as an alias
 420   // for dr[0-7].
 421   if (RegNo == 0 && Tok.getString().size() == 3 &&
 422       Tok.getString().startswith("db")) {
 423     switch (Tok.getString()[2]) {
 424     case '0': RegNo = X86::DR0; break;
 425     case '1': RegNo = X86::DR1; break;
 426     case '2': RegNo = X86::DR2; break;
 427     case '3': RegNo = X86::DR3; break;
 428     case '4': RegNo = X86::DR4; break;
 429     case '5': RegNo = X86::DR5; break;
 430     case '6': RegNo = X86::DR6; break;
 431     case '7': RegNo = X86::DR7; break;
 432     }
 433
 434     if (RegNo != 0) {
 435       EndLoc = Tok.getLoc();
 436       Parser.Lex(); // Eat it.
 437       return false;
 438     }
 439   }
 440
 441   if (RegNo == 0)
 442     return Error(Tok.getLoc(), "invalid register name");
 443
 444   EndLoc = Tok.getLoc();
 445   Parser.Lex(); // Eat identifier token.
 446   return false;
 447 }
 448
 449 X86Operand *X86ATTAsmParser::ParseOperand() {
 450   switch (getLexer().getKind()) {
 451   default:
 452     // Parse a memory operand with no segment register.
 453     return ParseMemOperand(0, Parser.getTok().getLoc());
 454   case AsmToken::Percent: {
 455     // Read the register.
 456     unsigned RegNo;
 457     SMLoc Start, End;
 458     if (ParseRegister(RegNo, Start, End)) return 0;
 459     if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
 460       Error(Start, "eiz and riz can only be used as index registers");
 461       return 0;
 462     }
 463
 464     // If this is a segment register followed by a ':', then this is the start
 465     // of a memory reference, otherwise this is a normal register reference.
 466     if (getLexer().isNot(AsmToken::Colon))
 467       return X86Operand::CreateReg(RegNo, Start, End);
 468
 469
 470     getParser().Lex(); // Eat the colon.
 471     return ParseMemOperand(RegNo, Start);
 472   }
 473   case AsmToken::Dollar: {
 474     // $42 -> immediate.
 475     SMLoc Start = Parser.getTok().getLoc(), End;
 476     Parser.Lex();
 477     const MCExpr *Val;
 478     if (getParser().ParseExpression(Val, End))
 479       return 0;
 480     return X86Operand::CreateImm(Val, Start, End);
 481   }
 482   }
 483 }
 484
 485 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
 486 /// has already been parsed if present.
 487 X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 488
 489   // We have to disambiguate a parenthesized expression "(4+5)" from the start
 490   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
 491   // only way to do this without lookahead is to eat the '(' and see what is
 492   // after it.
 493   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
 494   if (getLexer().isNot(AsmToken::LParen)) {
 495     SMLoc ExprEnd;
 496     if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
 497
 498     // After parsing the base expression we could either have a parenthesized
 499     // memory address or not.  If not, return now.  If so, eat the (.
 500     if (getLexer().isNot(AsmToken::LParen)) {
 501       // Unless we have a segment register, treat this as an immediate.
 502       if (SegReg == 0)
 503         return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
 504       return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 505     }
 506
 507     // Eat the '('.
 508     Parser.Lex();
 509   } else {
 510     // Okay, we have a '('.  We don't know if this is an expression or not, but
 511     // so we have to eat the ( to see beyond it.
 512     SMLoc LParenLoc = Parser.getTok().getLoc();
 513     Parser.Lex(); // Eat the '('.
 514
 515     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
 516       // Nothing to do here, fall into the code below with the '(' part of the
 517       // memory operand consumed.
 518     } else {
 519       SMLoc ExprEnd;
 520
 521       // It must be an parenthesized expression, parse it now.
 522       if (getParser().ParseParenExpression(Disp, ExprEnd))
 523         return 0;
 524
 525       // After parsing the base expression we could either have a parenthesized
 526       // memory address or not.  If not, return now.  If so, eat the (.
 527       if (getLexer().isNot(AsmToken::LParen)) {
 528         // Unless we have a segment register, treat this as an immediate.
 529         if (SegReg == 0)
 530           return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
 531         return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 532       }
 533
 534       // Eat the '('.
 535       Parser.Lex();
 536     }
 537   }
 538
 539   // If we reached here, then we just ate the ( of the memory operand.  Process
 540   // the rest of the memory operand.
 541   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
 542
 543   if (getLexer().is(AsmToken::Percent)) {
 544     SMLoc L;
 545     if (ParseRegister(BaseReg, L, L)) return 0;
 546     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
 547       Error(L, "eiz and riz can only be used as index registers");
 548       return 0;
 549     }
 550   }
 551
 552   if (getLexer().is(AsmToken::Comma)) {
 553     Parser.Lex(); // Eat the comma.
 554
 555     // Following the comma we should have either an index register, or a scale
 556     // value. We don't support the later form, but we want to parse it
 557     // correctly.
 558     //
 559     // Not that even though it would be completely consistent to support syntax
 560     // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
 561     if (getLexer().is(AsmToken::Percent)) {
 562       SMLoc L;
 563       if (ParseRegister(IndexReg, L, L)) return 0;
 564
 565       if (getLexer().isNot(AsmToken::RParen)) {
 566         // Parse the scale amount:
 567         //  ::= ',' [scale-expression]
 568         if (getLexer().isNot(AsmToken::Comma)) {
 569           Error(Parser.getTok().getLoc(),
 570                 "expected comma in scale expression");
 571           return 0;
 572         }
 573         Parser.Lex(); // Eat the comma.
 574
 575         if (getLexer().isNot(AsmToken::RParen)) {
 576           SMLoc Loc = Parser.getTok().getLoc();
 577
 578           int64_t ScaleVal;
 579           if (getParser().ParseAbsoluteExpression(ScaleVal))
 580             return 0;
 581
 582           // Validate the scale amount.
 583           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
 584             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
 585             return 0;
 586           }
 587           Scale = (unsigned)ScaleVal;
 588         }
 589       }
 590     } else if (getLexer().isNot(AsmToken::RParen)) {
 591       // A scale amount without an index is ignored.
 592       // index.
 593       SMLoc Loc = Parser.getTok().getLoc();
 594
 595       int64_t Value;
 596       if (getParser().ParseAbsoluteExpression(Value))
 597         return 0;
 598
 599       if (Value != 1)
 600         Warning(Loc, "scale factor without index register is ignored");
 601       Scale = 1;
 602     }
 603   }
 604
 605   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
 606   if (getLexer().isNot(AsmToken::RParen)) {
 607     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
 608     return 0;
 609   }
 610   SMLoc MemEnd = Parser.getTok().getLoc();
 611   Parser.Lex(); // Eat the ')'.
 612
 613   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
 614                                MemStart, MemEnd);
 615 }
 616
 617 bool X86ATTAsmParser::
 618 ParseInstruction(StringRef Name, SMLoc NameLoc,
 619                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 620   // The various flavors of pushf and popf use Requires<In32BitMode> and
 621   // Requires<In64BitMode>, but the assembler doesn't yet implement that.
 622   // For now, just do a manual check to prevent silent misencoding.
 623   if (Is64Bit) {
 624     if (Name == "popfl")
 625       return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
 626     else if (Name == "pushfl")
 627       return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
 628     else if (Name == "pusha")
 629       return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
 630   } else {
 631     if (Name == "popfq")
 632       return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
 633     else if (Name == "pushfq")
 634       return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
 635   }
 636
 637   // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
 638   // the form jrcxz is not allowed in 32-bit mode.
 639   if (Is64Bit) {
 640     if (Name == "jcxz")
 641       return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
 642   } else {
 643     if (Name == "jrcxz")
 644       return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
 645   }
 646
 647   // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
 648   // represent alternative syntaxes in the .td file, without requiring
 649   // instruction duplication.
 650   StringRef PatchedName = StringSwitch<StringRef>(Name)
 651     .Case("sal", "shl")
 652     .Case("salb", "shlb")
 653     .Case("sall", "shll")
 654     .Case("salq", "shlq")
 655     .Case("salw", "shlw")
 656     .Case("repe", "rep")
 657     .Case("repz", "rep")
 658     .Case("repnz", "repne")
 659     .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
 660     .Case("popf",  Is64Bit ? "popfq"  : "popfl")
 661     .Case("retl", Is64Bit ? "retl" : "ret")
 662     .Case("retq", Is64Bit ? "ret" : "retq")
 663     .Case("setz", "sete")
 664     .Case("setnz", "setne")
 665     .Case("jz", "je")
 666     .Case("jnz", "jne")
 667     .Case("jc", "jb")
 668     // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
 669     // jecxz requires an AdSize prefix but jecxz does not have a prefix in
 670     // 32-bit mode.
 671     .Case("jecxz", "jcxz")
 672     .Case("jrcxz", "jcxz")
 673     .Case("jna", "jbe")
 674     .Case("jnae", "jb")
 675     .Case("jnb", "jae")
 676     .Case("jnbe", "ja")
 677     .Case("jnc", "jae")
 678     .Case("jng", "jle")
 679     .Case("jnge", "jl")
 680     .Case("jnl", "jge")
 681     .Case("jnle", "jg")
 682     .Case("jpe", "jp")
 683     .Case("jpo", "jnp")
 684     .Case("cmovcl", "cmovbl")
 685     .Case("cmovcl", "cmovbl")
 686     .Case("cmovnal", "cmovbel")
 687     .Case("cmovnbl", "cmovael")
 688     .Case("cmovnbel", "cmoval")
 689     .Case("cmovncl", "cmovael")
 690     .Case("cmovngl", "cmovlel")
 691     .Case("cmovnl", "cmovgel")
 692     .Case("cmovngl", "cmovlel")
 693     .Case("cmovngel", "cmovll")
 694     .Case("cmovnll", "cmovgel")
 695     .Case("cmovnlel", "cmovgl")
 696     .Case("cmovnzl", "cmovnel")
 697     .Case("cmovzl", "cmovel")
 698     .Case("fwait", "wait")
 699     .Case("movzx", "movzb")
 700     .Default(Name);
 701
 702   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
 703   const MCExpr *ExtraImmOp = 0;
 704   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
 705       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
 706        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
 707     bool IsVCMP = PatchedName.startswith("vcmp");
 708     unsigned SSECCIdx = IsVCMP ? 4 : 3;
 709     unsigned SSEComparisonCode = StringSwitch<unsigned>(
 710       PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
 711       .Case("eq",          0)
 712       .Case("lt",          1)
 713       .Case("le",          2)
 714       .Case("unord",       3)
 715       .Case("neq",         4)
 716       .Case("nlt",         5)
 717       .Case("nle",         6)
 718       .Case("ord",         7)
 719       .Case("eq_uq",       8)
 720       .Case("nge",         9)
 721       .Case("ngt",      0x0A)
 722       .Case("false",    0x0B)
 723       .Case("neq_oq",   0x0C)
 724       .Case("ge",       0x0D)
 725       .Case("gt",       0x0E)
 726       .Case("true",     0x0F)
 727       .Case("eq_os",    0x10)
 728       .Case("lt_oq",    0x11)
 729       .Case("le_oq",    0x12)
 730       .Case("unord_s",  0x13)
 731       .Case("neq_us",   0x14)
 732       .Case("nlt_uq",   0x15)
 733       .Case("nle_uq",   0x16)
 734       .Case("ord_s",    0x17)
 735       .Case("eq_us",    0x18)
 736       .Case("nge_uq",   0x19)
 737       .Case("ngt_uq",   0x1A)
 738       .Case("false_os", 0x1B)
 739       .Case("neq_os",   0x1C)
 740       .Case("ge_oq",    0x1D)
 741       .Case("gt_oq",    0x1E)
 742       .Case("true_us",  0x1F)
 743       .Default(~0U);
 744     if (SSEComparisonCode != ~0U) {
 745       ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
 746                                           getParser().getContext());
 747       if (PatchedName.endswith("ss")) {
 748         PatchedName = IsVCMP ? "vcmpss" : "cmpss";
 749       } else if (PatchedName.endswith("sd")) {
 750         PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
 751       } else if (PatchedName.endswith("ps")) {
 752         PatchedName = IsVCMP ? "vcmpps" : "cmpps";
 753       } else {
 754         assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
 755         PatchedName = IsVCMP ? "vcmppd" : "cmppd";
 756       }
 757     }
 758   }
 759
 760   // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
 761   if (PatchedName.startswith("vpclmul")) {
 762     unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
 763       PatchedName.slice(7, PatchedName.size() - 2))
 764       .Case("lqlq", 0x00) // src1[63:0],   src2[63:0]
 765       .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
 766       .Case("lqhq", 0x10) // src1[63:0],   src2[127:64]
 767       .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
 768       .Default(~0U);
 769     if (CLMULQuadWordSelect != ~0U) {
 770       ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
 771                                           getParser().getContext());
 772       assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
 773       PatchedName = "vpclmulqdq";
 774     }
 775   }
 776   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 777
 778   if (ExtraImmOp)
 779     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 780
 781   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 782
 783     // Parse '*' modifier.
 784     if (getLexer().is(AsmToken::Star)) {
 785       SMLoc Loc = Parser.getTok().getLoc();
 786       Operands.push_back(X86Operand::CreateToken("*", Loc));
 787       Parser.Lex(); // Eat the star.
 788     }
 789
 790     // Read the first operand.
 791     if (X86Operand *Op = ParseOperand())
 792       Operands.push_back(Op);
 793     else
 794       return true;
 795
 796     while (getLexer().is(AsmToken::Comma)) {
 797       Parser.Lex();  // Eat the comma.
 798
 799       // Parse and remember the operand.
 800       if (X86Operand *Op = ParseOperand())
 801         Operands.push_back(Op);
 802       else
 803         return true;
 804     }
 805   }
 806
 807   // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
 808   if ((Name.startswith("shr") || Name.startswith("sar") ||
 809        Name.startswith("shl")) &&
 810       Operands.size() == 3 &&
 811       static_cast<X86Operand*>(Operands[1])->isImm() &&
 812       isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
 813       cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
 814     delete Operands[1];
 815     Operands.erase(Operands.begin() + 1);
 816   }
 817
 818   // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
 819   // "f{mul*,add*,sub*,div*} $op"
 820   if ((Name.startswith("fmul") || Name.startswith("fadd") ||
 821        Name.startswith("fsub") || Name.startswith("fdiv")) &&
 822       Operands.size() == 3 &&
 823       static_cast<X86Operand*>(Operands[2])->isReg() &&
 824       static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
 825     delete Operands[2];
 826     Operands.erase(Operands.begin() + 2);
 827   }
 828
 829   // FIXME: Hack to handle "imul A, B" which is an alias for "imul A, B, B".
 830   if (Name.startswith("imul") && Operands.size() == 3 &&
 831       static_cast<X86Operand*>(Operands.back())->isReg()) {
 832     X86Operand *Op = static_cast<X86Operand*>(Operands.back());
 833     Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
 834                                              Op->getEndLoc()));
 835   }
 836
 837   return false;
 838 }
 839
 840 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
 841   StringRef IDVal = DirectiveID.getIdentifier();
 842   if (IDVal == ".word")
 843     return ParseDirectiveWord(2, DirectiveID.getLoc());
 844   return true;
 845 }
 846
 847 /// ParseDirectiveWord
 848 ///  ::= .word [ expression (, expression)* ]
 849 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 850   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 851     for (;;) {
 852       const MCExpr *Value;
 853       if (getParser().ParseExpression(Value))
 854         return true;
 855
 856       getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
 857
 858       if (getLexer().is(AsmToken::EndOfStatement))
 859         break;
 860
 861       // FIXME: Improve diagnostic.
 862       if (getLexer().isNot(AsmToken::Comma))
 863         return Error(L, "unexpected token in directive");
 864       Parser.Lex();
 865     }
 866   }
 867
 868   Parser.Lex();
 869   return false;
 870 }
 871
 872
 873 bool
 874 X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
 875                                   const SmallVectorImpl<MCParsedAsmOperand*>
 876                                     &Operands,
 877                                   MCInst &Inst) {
 878   assert(!Operands.empty() && "Unexpect empty operand list!");
 879
 880   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
 881   assert(Op->isToken() && "Leading operand should always be a mnemonic!");
 882
 883   // First, try a direct match.
 884   if (!MatchInstructionImpl(Operands, Inst))
 885     return false;
 886
 887   // FIXME: Ideally, we would only attempt suffix matches for things which are
 888   // valid prefixes, and we could just infer the right unambiguous
 889   // type. However, that requires substantially more matcher support than the
 890   // following hack.
 891
 892   // Change the operand to point to a temporary token.
 893   StringRef Base = Op->getToken();
 894   SmallString<16> Tmp;
 895   Tmp += Base;
 896   Tmp += ' ';
 897   Op->setTokenValue(Tmp.str());
 898
 899   // Check for the various suffix matches.
 900   Tmp[Base.size()] = 'b';
 901   bool MatchB = MatchInstructionImpl(Operands, Inst);
 902   Tmp[Base.size()] = 'w';
 903   bool MatchW = MatchInstructionImpl(Operands, Inst);
 904   Tmp[Base.size()] = 'l';
 905   bool MatchL = MatchInstructionImpl(Operands, Inst);
 906   Tmp[Base.size()] = 'q';
 907   bool MatchQ = MatchInstructionImpl(Operands, Inst);
 908
 909   // Restore the old token.
 910   Op->setTokenValue(Base);
 911
 912   // If exactly one matched, then we treat that as a successful match (and the
 913   // instruction will already have been filled in correctly, since the failing
 914   // matches won't have modified it).
 915   if (MatchB + MatchW + MatchL + MatchQ == 3)
 916     return false;
 917
 918   // Otherwise, the match failed.
 919
 920   // If we had multiple suffix matches, then identify this as an ambiguous
 921   // match.
 922   if (MatchB + MatchW + MatchL + MatchQ != 4) {
 923     char MatchChars[4];
 924     unsigned NumMatches = 0;
 925     if (!MatchB)
 926       MatchChars[NumMatches++] = 'b';
 927     if (!MatchW)
 928       MatchChars[NumMatches++] = 'w';
 929     if (!MatchL)
 930       MatchChars[NumMatches++] = 'l';
 931     if (!MatchQ)
 932       MatchChars[NumMatches++] = 'q';
 933
 934     SmallString<126> Msg;
 935     raw_svector_ostream OS(Msg);
 936     OS << "ambiguous instructions require an explicit suffix (could be ";
 937     for (unsigned i = 0; i != NumMatches; ++i) {
 938       if (i != 0)
 939         OS << ", ";
 940       if (i + 1 == NumMatches)
 941         OS << "or ";
 942       OS << "'" << Base << MatchChars[i] << "'";
 943     }
 944     OS << ")";
 945     Error(IDLoc, OS.str());
 946   } else {
 947     // FIXME: We should give nicer diagnostics about the exact failure.
 948     Error(IDLoc, "unrecognized instruction");
 949   }
 950
 951   return true;
 952 }
 953
 954
 955 extern "C" void LLVMInitializeX86AsmLexer();
 956
 957 // Force static initialization.
 958 extern "C" void LLVMInitializeX86AsmParser() {
 959   RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
 960   RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
 961   LLVMInitializeX86AsmLexer();
 962 }
 963
 964 #include "X86GenAsmMatcher.inc"