lib/Target/X86/AsmParser/X86AsmParser.cpp

   1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/Target/TargetAsmParser.h"
  11 #include "X86.h"
  12 #include "llvm/ADT/SmallVector.h"
  13 #include "llvm/ADT/StringSwitch.h"
  14 #include "llvm/ADT/Twine.h"
  15 #include "llvm/MC/MCStreamer.h"
  16 #include "llvm/MC/MCExpr.h"
  17 #include "llvm/MC/MCInst.h"
  18 #include "llvm/MC/MCParser/MCAsmLexer.h"
  19 #include "llvm/MC/MCParser/MCAsmParser.h"
  20 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  21 #include "llvm/Support/SourceMgr.h"
  22 #include "llvm/Target/TargetRegistry.h"
  23 #include "llvm/Target/TargetAsmParser.h"
  24 using namespace llvm;
  25
  26 namespace {
  27 struct X86Operand;
  28
  29 class X86ATTAsmParser : public TargetAsmParser {
  30   MCAsmParser &Parser;
  31
  32 protected:
  33   unsigned Is64Bit : 1;
  34
  35 private:
  36   MCAsmParser &getParser() const { return Parser; }
  37
  38   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
  39
  40   void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
  41
  42   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
  43
  44   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
  45
  46   X86Operand *ParseOperand();
  47   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
  48
  49   bool ParseDirectiveWord(unsigned Size, SMLoc L);
  50
  51   void InstructionCleanup(MCInst &Inst);
  52
  53   /// @name Auto-generated Match Functions
  54   /// {
  55
  56   bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
  57                         MCInst &Inst);
  58
  59   bool MatchInstructionImpl(
  60     const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
  61
  62   /// }
  63
  64 public:
  65   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
  66     : TargetAsmParser(T), Parser(_Parser) {}
  67
  68   virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
  69                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
  70
  71   virtual bool ParseDirective(AsmToken DirectiveID);
  72 };
  73
  74 class X86_32ATTAsmParser : public X86ATTAsmParser {
  75 public:
  76   X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
  77     : X86ATTAsmParser(T, _Parser) {
  78     Is64Bit = false;
  79   }
  80 };
  81
  82 class X86_64ATTAsmParser : public X86ATTAsmParser {
  83 public:
  84   X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
  85     : X86ATTAsmParser(T, _Parser) {
  86     Is64Bit = true;
  87   }
  88 };
  89
  90 } // end anonymous namespace
  91
  92 /// @name Auto-generated Match Functions
  93 /// {
  94
  95 static unsigned MatchRegisterName(StringRef Name);
  96
  97 /// }
  98
  99 namespace {
 100
 101 /// X86Operand - Instances of this class represent a parsed X86 machine
 102 /// instruction.
 103 struct X86Operand : public MCParsedAsmOperand {
 104   enum KindTy {
 105     Token,
 106     Register,
 107     Immediate,
 108     Memory
 109   } Kind;
 110
 111   SMLoc StartLoc, EndLoc;
 112
 113   union {
 114     struct {
 115       const char *Data;
 116       unsigned Length;
 117     } Tok;
 118
 119     struct {
 120       unsigned RegNo;
 121     } Reg;
 122
 123     struct {
 124       const MCExpr *Val;
 125     } Imm;
 126
 127     struct {
 128       unsigned SegReg;
 129       const MCExpr *Disp;
 130       unsigned BaseReg;
 131       unsigned IndexReg;
 132       unsigned Scale;
 133     } Mem;
 134   };
 135
 136   X86Operand(KindTy K, SMLoc Start, SMLoc End)
 137     : Kind(K), StartLoc(Start), EndLoc(End) {}
 138
 139   /// getStartLoc - Get the location of the first token of this operand.
 140   SMLoc getStartLoc() const { return StartLoc; }
 141   /// getEndLoc - Get the location of the last token of this operand.
 142   SMLoc getEndLoc() const { return EndLoc; }
 143
 144   StringRef getToken() const {
 145     assert(Kind == Token && "Invalid access!");
 146     return StringRef(Tok.Data, Tok.Length);
 147   }
 148   void setTokenValue(StringRef Value) {
 149     assert(Kind == Token && "Invalid access!");
 150     Tok.Data = Value.data();
 151     Tok.Length = Value.size();
 152   }
 153
 154   unsigned getReg() const {
 155     assert(Kind == Register && "Invalid access!");
 156     return Reg.RegNo;
 157   }
 158
 159   const MCExpr *getImm() const {
 160     assert(Kind == Immediate && "Invalid access!");
 161     return Imm.Val;
 162   }
 163
 164   const MCExpr *getMemDisp() const {
 165     assert(Kind == Memory && "Invalid access!");
 166     return Mem.Disp;
 167   }
 168   unsigned getMemSegReg() const {
 169     assert(Kind == Memory && "Invalid access!");
 170     return Mem.SegReg;
 171   }
 172   unsigned getMemBaseReg() const {
 173     assert(Kind == Memory && "Invalid access!");
 174     return Mem.BaseReg;
 175   }
 176   unsigned getMemIndexReg() const {
 177     assert(Kind == Memory && "Invalid access!");
 178     return Mem.IndexReg;
 179   }
 180   unsigned getMemScale() const {
 181     assert(Kind == Memory && "Invalid access!");
 182     return Mem.Scale;
 183   }
 184
 185   bool isToken() const {return Kind == Token; }
 186
 187   bool isImm() const { return Kind == Immediate; }
 188
 189   bool isImmSExti16i8() const {
 190     if (!isImm())
 191       return false;
 192
 193     // If this isn't a constant expr, just assume it fits and let relaxation
 194     // handle it.
 195     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 196     if (!CE)
 197       return true;
 198
 199     // Otherwise, check the value is in a range that makes sense for this
 200     // extension.
 201     uint64_t Value = CE->getValue();
 202     return ((                                  Value <= 0x000000000000007FULL)||
 203             (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
 204             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 205   }
 206   bool isImmSExti32i8() const {
 207     if (!isImm())
 208       return false;
 209
 210     // If this isn't a constant expr, just assume it fits and let relaxation
 211     // handle it.
 212     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 213     if (!CE)
 214       return true;
 215
 216     // Otherwise, check the value is in a range that makes sense for this
 217     // extension.
 218     uint64_t Value = CE->getValue();
 219     return ((                                  Value <= 0x000000000000007FULL)||
 220             (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
 221             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 222   }
 223   bool isImmSExti64i8() const {
 224     if (!isImm())
 225       return false;
 226
 227     // If this isn't a constant expr, just assume it fits and let relaxation
 228     // handle it.
 229     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 230     if (!CE)
 231       return true;
 232
 233     // Otherwise, check the value is in a range that makes sense for this
 234     // extension.
 235     uint64_t Value = CE->getValue();
 236     return ((                                  Value <= 0x000000000000007FULL)||
 237             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 238   }
 239   bool isImmSExti64i32() const {
 240     if (!isImm())
 241       return false;
 242
 243     // If this isn't a constant expr, just assume it fits and let relaxation
 244     // handle it.
 245     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 246     if (!CE)
 247       return true;
 248
 249     // Otherwise, check the value is in a range that makes sense for this
 250     // extension.
 251     uint64_t Value = CE->getValue();
 252     return ((                                  Value <= 0x000000007FFFFFFFULL)||
 253             (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 254   }
 255
 256   bool isMem() const { return Kind == Memory; }
 257
 258   bool isAbsMem() const {
 259     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
 260       !getMemIndexReg() && getMemScale() == 1;
 261   }
 262
 263   bool isNoSegMem() const {
 264     return Kind == Memory && !getMemSegReg();
 265   }
 266
 267   bool isReg() const { return Kind == Register; }
 268
 269   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
 270     // Add as immediates when possible.
 271     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
 272       Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
 273     else
 274       Inst.addOperand(MCOperand::CreateExpr(Expr));
 275   }
 276
 277   void addRegOperands(MCInst &Inst, unsigned N) const {
 278     assert(N == 1 && "Invalid number of operands!");
 279     Inst.addOperand(MCOperand::CreateReg(getReg()));
 280   }
 281
 282   void addImmOperands(MCInst &Inst, unsigned N) const {
 283     assert(N == 1 && "Invalid number of operands!");
 284     addExpr(Inst, getImm());
 285   }
 286
 287   void addMemOperands(MCInst &Inst, unsigned N) const {
 288     assert((N == 5) && "Invalid number of operands!");
 289     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 290     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 291     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 292     addExpr(Inst, getMemDisp());
 293     Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
 294   }
 295
 296   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
 297     assert((N == 1) && "Invalid number of operands!");
 298     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 299   }
 300
 301   void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
 302     assert((N == 4) && "Invalid number of operands!");
 303     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 304     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 305     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 306     addExpr(Inst, getMemDisp());
 307   }
 308
 309   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
 310     X86Operand *Res = new X86Operand(Token, Loc, Loc);
 311     Res->Tok.Data = Str.data();
 312     Res->Tok.Length = Str.size();
 313     return Res;
 314   }
 315
 316   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
 317     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
 318     Res->Reg.RegNo = RegNo;
 319     return Res;
 320   }
 321
 322   static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
 323     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
 324     Res->Imm.Val = Val;
 325     return Res;
 326   }
 327
 328   /// Create an absolute memory operand.
 329   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
 330                                SMLoc EndLoc) {
 331     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 332     Res->Mem.SegReg   = 0;
 333     Res->Mem.Disp     = Disp;
 334     Res->Mem.BaseReg  = 0;
 335     Res->Mem.IndexReg = 0;
 336     Res->Mem.Scale    = 1;
 337     return Res;
 338   }
 339
 340   /// Create a generalized memory operand.
 341   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
 342                                unsigned BaseReg, unsigned IndexReg,
 343                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
 344     // We should never just have a displacement, that should be parsed as an
 345     // absolute memory operand.
 346     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
 347
 348     // The scale should always be one of {1,2,4,8}.
 349     assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
 350            "Invalid scale!");
 351     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 352     Res->Mem.SegReg   = SegReg;
 353     Res->Mem.Disp     = Disp;
 354     Res->Mem.BaseReg  = BaseReg;
 355     Res->Mem.IndexReg = IndexReg;
 356     Res->Mem.Scale    = Scale;
 357     return Res;
 358   }
 359 };
 360
 361 } // end anonymous namespace.
 362
 363
 364 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 365                                     SMLoc &StartLoc, SMLoc &EndLoc) {
 366   RegNo = 0;
 367   const AsmToken &TokPercent = Parser.getTok();
 368   assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
 369   StartLoc = TokPercent.getLoc();
 370   Parser.Lex(); // Eat percent token.
 371
 372   const AsmToken &Tok = Parser.getTok();
 373   if (Tok.isNot(AsmToken::Identifier))
 374     return Error(Tok.getLoc(), "invalid register name");
 375
 376   // FIXME: Validate register for the current architecture; we have to do
 377   // validation later, so maybe there is no need for this here.
 378   RegNo = MatchRegisterName(Tok.getString());
 379
 380   // Parse %st(1) and "%st" as "%st(0)"
 381   if (RegNo == 0 && Tok.getString() == "st") {
 382     RegNo = X86::ST0;
 383     EndLoc = Tok.getLoc();
 384     Parser.Lex(); // Eat 'st'
 385
 386     // Check to see if we have '(4)' after %st.
 387     if (getLexer().isNot(AsmToken::LParen))
 388       return false;
 389     // Lex the paren.
 390     getParser().Lex();
 391
 392     const AsmToken &IntTok = Parser.getTok();
 393     if (IntTok.isNot(AsmToken::Integer))
 394       return Error(IntTok.getLoc(), "expected stack index");
 395     switch (IntTok.getIntVal()) {
 396     case 0: RegNo = X86::ST0; break;
 397     case 1: RegNo = X86::ST1; break;
 398     case 2: RegNo = X86::ST2; break;
 399     case 3: RegNo = X86::ST3; break;
 400     case 4: RegNo = X86::ST4; break;
 401     case 5: RegNo = X86::ST5; break;
 402     case 6: RegNo = X86::ST6; break;
 403     case 7: RegNo = X86::ST7; break;
 404     default: return Error(IntTok.getLoc(), "invalid stack index");
 405     }
 406
 407     if (getParser().Lex().isNot(AsmToken::RParen))
 408       return Error(Parser.getTok().getLoc(), "expected ')'");
 409
 410     EndLoc = Tok.getLoc();
 411     Parser.Lex(); // Eat ')'
 412     return false;
 413   }
 414
 415   if (RegNo == 0)
 416     return Error(Tok.getLoc(), "invalid register name");
 417
 418   EndLoc = Tok.getLoc();
 419   Parser.Lex(); // Eat identifier token.
 420   return false;
 421 }
 422
 423 X86Operand *X86ATTAsmParser::ParseOperand() {
 424   switch (getLexer().getKind()) {
 425   default:
 426     // Parse a memory operand with no segment register.
 427     return ParseMemOperand(0, Parser.getTok().getLoc());
 428   case AsmToken::Percent: {
 429     // Read the register.
 430     unsigned RegNo;
 431     SMLoc Start, End;
 432     if (ParseRegister(RegNo, Start, End)) return 0;
 433
 434     // If this is a segment register followed by a ':', then this is the start
 435     // of a memory reference, otherwise this is a normal register reference.
 436     if (getLexer().isNot(AsmToken::Colon))
 437       return X86Operand::CreateReg(RegNo, Start, End);
 438
 439
 440     getParser().Lex(); // Eat the colon.
 441     return ParseMemOperand(RegNo, Start);
 442   }
 443   case AsmToken::Dollar: {
 444     // $42 -> immediate.
 445     SMLoc Start = Parser.getTok().getLoc(), End;
 446     Parser.Lex();
 447     const MCExpr *Val;
 448     if (getParser().ParseExpression(Val, End))
 449       return 0;
 450     return X86Operand::CreateImm(Val, Start, End);
 451   }
 452   }
 453 }
 454
 455 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
 456 /// has already been parsed if present.
 457 X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 458
 459   // We have to disambiguate a parenthesized expression "(4+5)" from the start
 460   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
 461   // only way to do this without lookahead is to eat the '(' and see what is
 462   // after it.
 463   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
 464   if (getLexer().isNot(AsmToken::LParen)) {
 465     SMLoc ExprEnd;
 466     if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
 467
 468     // After parsing the base expression we could either have a parenthesized
 469     // memory address or not.  If not, return now.  If so, eat the (.
 470     if (getLexer().isNot(AsmToken::LParen)) {
 471       // Unless we have a segment register, treat this as an immediate.
 472       if (SegReg == 0)
 473         return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
 474       return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 475     }
 476
 477     // Eat the '('.
 478     Parser.Lex();
 479   } else {
 480     // Okay, we have a '('.  We don't know if this is an expression or not, but
 481     // so we have to eat the ( to see beyond it.
 482     SMLoc LParenLoc = Parser.getTok().getLoc();
 483     Parser.Lex(); // Eat the '('.
 484
 485     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
 486       // Nothing to do here, fall into the code below with the '(' part of the
 487       // memory operand consumed.
 488     } else {
 489       SMLoc ExprEnd;
 490
 491       // It must be an parenthesized expression, parse it now.
 492       if (getParser().ParseParenExpression(Disp, ExprEnd))
 493         return 0;
 494
 495       // After parsing the base expression we could either have a parenthesized
 496       // memory address or not.  If not, return now.  If so, eat the (.
 497       if (getLexer().isNot(AsmToken::LParen)) {
 498         // Unless we have a segment register, treat this as an immediate.
 499         if (SegReg == 0)
 500           return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
 501         return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 502       }
 503
 504       // Eat the '('.
 505       Parser.Lex();
 506     }
 507   }
 508
 509   // If we reached here, then we just ate the ( of the memory operand.  Process
 510   // the rest of the memory operand.
 511   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
 512
 513   if (getLexer().is(AsmToken::Percent)) {
 514     SMLoc L;
 515     if (ParseRegister(BaseReg, L, L)) return 0;
 516   }
 517
 518   if (getLexer().is(AsmToken::Comma)) {
 519     Parser.Lex(); // Eat the comma.
 520
 521     // Following the comma we should have either an index register, or a scale
 522     // value. We don't support the later form, but we want to parse it
 523     // correctly.
 524     //
 525     // Not that even though it would be completely consistent to support syntax
 526     // like "1(%eax,,1)", the assembler doesn't.
 527     if (getLexer().is(AsmToken::Percent)) {
 528       SMLoc L;
 529       if (ParseRegister(IndexReg, L, L)) return 0;
 530
 531       if (getLexer().isNot(AsmToken::RParen)) {
 532         // Parse the scale amount:
 533         //  ::= ',' [scale-expression]
 534         if (getLexer().isNot(AsmToken::Comma)) {
 535           Error(Parser.getTok().getLoc(),
 536                 "expected comma in scale expression");
 537           return 0;
 538         }
 539         Parser.Lex(); // Eat the comma.
 540
 541         if (getLexer().isNot(AsmToken::RParen)) {
 542           SMLoc Loc = Parser.getTok().getLoc();
 543
 544           int64_t ScaleVal;
 545           if (getParser().ParseAbsoluteExpression(ScaleVal))
 546             return 0;
 547
 548           // Validate the scale amount.
 549           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
 550             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
 551             return 0;
 552           }
 553           Scale = (unsigned)ScaleVal;
 554         }
 555       }
 556     } else if (getLexer().isNot(AsmToken::RParen)) {
 557       // Otherwise we have the unsupported form of a scale amount without an
 558       // index.
 559       SMLoc Loc = Parser.getTok().getLoc();
 560
 561       int64_t Value;
 562       if (getParser().ParseAbsoluteExpression(Value))
 563         return 0;
 564
 565       Error(Loc, "cannot have scale factor without index register");
 566       return 0;
 567     }
 568   }
 569
 570   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
 571   if (getLexer().isNot(AsmToken::RParen)) {
 572     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
 573     return 0;
 574   }
 575   SMLoc MemEnd = Parser.getTok().getLoc();
 576   Parser.Lex(); // Eat the ')'.
 577
 578   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
 579                                MemStart, MemEnd);
 580 }
 581
 582 bool X86ATTAsmParser::
 583 ParseInstruction(const StringRef &Name, SMLoc NameLoc,
 584                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 585   // The various flavors of pushf and popf use Requires<In32BitMode> and
 586   // Requires<In64BitMode>, but the assembler doesn't yet implement that.
 587   // For now, just do a manual check to prevent silent misencoding.
 588   if (Is64Bit) {
 589     if (Name == "popfl")
 590       return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
 591     else if (Name == "pushfl")
 592       return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
 593   } else {
 594     if (Name == "popfq")
 595       return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
 596     else if (Name == "pushfq")
 597       return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
 598   }
 599
 600   // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
 601   // represent alternative syntaxes in the .td file, without requiring
 602   // instruction duplication.
 603   StringRef PatchedName = StringSwitch<StringRef>(Name)
 604     .Case("sal", "shl")
 605     .Case("salb", "shlb")
 606     .Case("sall", "shll")
 607     .Case("salq", "shlq")
 608     .Case("salw", "shlw")
 609     .Case("repe", "rep")
 610     .Case("repz", "rep")
 611     .Case("repnz", "repne")
 612     .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
 613     .Case("popf",  Is64Bit ? "popfq"  : "popfl")
 614     .Case("retl", Is64Bit ? "retl" : "ret")
 615     .Case("retq", Is64Bit ? "ret" : "retq")
 616     .Case("setz", "sete")
 617     .Case("setnz", "setne")
 618     .Case("jz", "je")
 619     .Case("jnz", "jne")
 620     .Case("jc", "jb")
 621     .Case("jecxz", "jcxz")
 622     .Case("jna", "jbe")
 623     .Case("jnae", "jb")
 624     .Case("jnb", "jae")
 625     .Case("jnbe", "ja")
 626     .Case("jnc", "jae")
 627     .Case("jng", "jle")
 628     .Case("jnge", "jl")
 629     .Case("jnl", "jge")
 630     .Case("jnle", "jg")
 631     .Case("jpe", "jp")
 632     .Case("jpo", "jnp")
 633     .Case("cmovcl", "cmovbl")
 634     .Case("cmovcl", "cmovbl")
 635     .Case("cmovnal", "cmovbel")
 636     .Case("cmovnbl", "cmovael")
 637     .Case("cmovnbel", "cmoval")
 638     .Case("cmovncl", "cmovael")
 639     .Case("cmovngl", "cmovlel")
 640     .Case("cmovnl", "cmovgel")
 641     .Case("cmovngl", "cmovlel")
 642     .Case("cmovngel", "cmovll")
 643     .Case("cmovnll", "cmovgel")
 644     .Case("cmovnlel", "cmovgl")
 645     .Case("cmovnzl", "cmovnel")
 646     .Case("cmovzl", "cmovel")
 647     .Case("fwait", "wait")
 648     .Case("movzx", "movzb")
 649     .Default(Name);
 650
 651   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
 652   const MCExpr *ExtraImmOp = 0;
 653   if (PatchedName.startswith("cmp") &&
 654       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
 655        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
 656     unsigned SSEComparisonCode = StringSwitch<unsigned>(
 657       PatchedName.slice(3, PatchedName.size() - 2))
 658       .Case("eq", 0)
 659       .Case("lt", 1)
 660       .Case("le", 2)
 661       .Case("unord", 3)
 662       .Case("neq", 4)
 663       .Case("nlt", 5)
 664       .Case("nle", 6)
 665       .Case("ord", 7)
 666       .Default(~0U);
 667     if (SSEComparisonCode != ~0U) {
 668       ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
 669                                           getParser().getContext());
 670       if (PatchedName.endswith("ss")) {
 671         PatchedName = "cmpss";
 672       } else if (PatchedName.endswith("sd")) {
 673         PatchedName = "cmpsd";
 674       } else if (PatchedName.endswith("ps")) {
 675         PatchedName = "cmpps";
 676       } else {
 677         assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
 678         PatchedName = "cmppd";
 679       }
 680     }
 681   }
 682   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 683
 684   if (ExtraImmOp)
 685     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 686
 687   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 688
 689     // Parse '*' modifier.
 690     if (getLexer().is(AsmToken::Star)) {
 691       SMLoc Loc = Parser.getTok().getLoc();
 692       Operands.push_back(X86Operand::CreateToken("*", Loc));
 693       Parser.Lex(); // Eat the star.
 694     }
 695
 696     // Read the first operand.
 697     if (X86Operand *Op = ParseOperand())
 698       Operands.push_back(Op);
 699     else
 700       return true;
 701
 702     while (getLexer().is(AsmToken::Comma)) {
 703       Parser.Lex();  // Eat the comma.
 704
 705       // Parse and remember the operand.
 706       if (X86Operand *Op = ParseOperand())
 707         Operands.push_back(Op);
 708       else
 709         return true;
 710     }
 711   }
 712
 713   // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
 714   if ((Name.startswith("shr") || Name.startswith("sar") ||
 715        Name.startswith("shl")) &&
 716       Operands.size() == 3 &&
 717       static_cast<X86Operand*>(Operands[1])->isImm() &&
 718       isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
 719       cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
 720     delete Operands[1];
 721     Operands.erase(Operands.begin() + 1);
 722   }
 723
 724   // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
 725   // "f{mul*,add*,sub*,div*} $op"
 726   if ((Name.startswith("fmul") || Name.startswith("fadd") ||
 727        Name.startswith("fsub") || Name.startswith("fdiv")) &&
 728       Operands.size() == 3 &&
 729       static_cast<X86Operand*>(Operands[2])->isReg() &&
 730       static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
 731     delete Operands[2];
 732     Operands.erase(Operands.begin() + 2);
 733   }
 734
 735   return false;
 736 }
 737
 738 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
 739   StringRef IDVal = DirectiveID.getIdentifier();
 740   if (IDVal == ".word")
 741     return ParseDirectiveWord(2, DirectiveID.getLoc());
 742   return true;
 743 }
 744
 745 /// ParseDirectiveWord
 746 ///  ::= .word [ expression (, expression)* ]
 747 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 748   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 749     for (;;) {
 750       const MCExpr *Value;
 751       if (getParser().ParseExpression(Value))
 752         return true;
 753
 754       getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
 755
 756       if (getLexer().is(AsmToken::EndOfStatement))
 757         break;
 758
 759       // FIXME: Improve diagnostic.
 760       if (getLexer().isNot(AsmToken::Comma))
 761         return Error(L, "unexpected token in directive");
 762       Parser.Lex();
 763     }
 764   }
 765
 766   Parser.Lex();
 767   return false;
 768 }
 769
 770 /// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
 771 /// imm operand, to having "rm" or "mr" operands with the offset in the disp
 772 /// field.
 773 static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
 774                          bool isMR) {
 775   MCOperand Disp = Inst.getOperand(0);
 776
 777   // Start over with an empty instruction.
 778   Inst = MCInst();
 779   Inst.setOpcode(Opc);
 780
 781   if (!isMR)
 782     Inst.addOperand(MCOperand::CreateReg(RegNo));
 783
 784   // Add the mem operand.
 785   Inst.addOperand(MCOperand::CreateReg(0));  // Segment
 786   Inst.addOperand(MCOperand::CreateImm(1));  // Scale
 787   Inst.addOperand(MCOperand::CreateReg(0));  // IndexReg
 788   Inst.addOperand(Disp);                     // Displacement
 789   Inst.addOperand(MCOperand::CreateReg(0));  // BaseReg
 790
 791   if (isMR)
 792     Inst.addOperand(MCOperand::CreateReg(RegNo));
 793 }
 794
 795 // FIXME: Custom X86 cleanup function to implement a temporary hack to handle
 796 // matching INCL/DECL correctly for x86_64. This needs to be replaced by a
 797 // proper mechanism for supporting (ambiguous) feature dependent instructions.
 798 void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
 799   if (!Is64Bit) return;
 800
 801   switch (Inst.getOpcode()) {
 802   case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
 803   case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
 804   case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
 805   case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
 806   case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
 807   case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
 808   case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
 809   case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
 810
 811   // moffset instructions are x86-32 only.
 812   case X86::MOV8o8a:   LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
 813   case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
 814   case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
 815   case X86::MOV8ao8:   LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
 816   case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
 817   case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
 818   }
 819 }
 820
 821 bool
 822 X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
 823                                     &Operands,
 824                                   MCInst &Inst) {
 825   // First, try a direct match.
 826   if (!MatchInstructionImpl(Operands, Inst))
 827     return false;
 828
 829   // Ignore anything which is obviously not a suffix match.
 830   if (Operands.size() == 0)
 831     return true;
 832   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
 833   if (!Op->isToken() || Op->getToken().size() > 15)
 834     return true;
 835
 836   // FIXME: Ideally, we would only attempt suffix matches for things which are
 837   // valid prefixes, and we could just infer the right unambiguous
 838   // type. However, that requires substantially more matcher support than the
 839   // following hack.
 840
 841   // Change the operand to point to a temporary token.
 842   char Tmp[16];
 843   StringRef Base = Op->getToken();
 844   memcpy(Tmp, Base.data(), Base.size());
 845   Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
 846
 847   // Check for the various suffix matches.
 848   Tmp[Base.size()] = 'b';
 849   bool MatchB = MatchInstructionImpl(Operands, Inst);
 850   Tmp[Base.size()] = 'w';
 851   bool MatchW = MatchInstructionImpl(Operands, Inst);
 852   Tmp[Base.size()] = 'l';
 853   bool MatchL = MatchInstructionImpl(Operands, Inst);
 854   Tmp[Base.size()] = 'q';
 855   bool MatchQ = MatchInstructionImpl(Operands, Inst);
 856
 857   // Restore the old token.
 858   Op->setTokenValue(Base);
 859
 860   // If exactly one matched, then we treat that as a successful match (and the
 861   // instruction will already have been filled in correctly, since the failing
 862   // matches won't have modified it).
 863   if (MatchB + MatchW + MatchL + MatchQ == 3)
 864     return false;
 865
 866   // Otherwise, the match failed.
 867   return true;
 868 }
 869
 870
 871 extern "C" void LLVMInitializeX86AsmLexer();
 872
 873 // Force static initialization.
 874 extern "C" void LLVMInitializeX86AsmParser() {
 875   RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
 876   RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
 877   LLVMInitializeX86AsmLexer();
 878 }
 879
 880 #include "X86GenAsmMatcher.inc"