1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Target/TargetAsmParser.h"
12 #include "X86Subtarget.h"
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringSwitch.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/MC/MCStreamer.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCParser/MCAsmLexer.h"
21 #include "llvm/MC/MCParser/MCAsmParser.h"
22 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
23 #include "llvm/Support/SourceMgr.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/Target/TargetRegistry.h"
26 #include "llvm/Target/TargetAsmParser.h"
32 class X86ATTAsmParser : public TargetAsmParser {
40 MCAsmParser &getParser() const { return Parser; }
42 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
44 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
46 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
48 X86Operand *ParseOperand();
49 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
51 bool ParseDirectiveWord(unsigned Size, SMLoc L);
53 bool MatchInstruction(SMLoc IDLoc,
54 const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
57 /// @name Auto-generated Matcher Functions
60 #define GET_ASSEMBLER_HEADER
61 #include "X86GenAsmMatcher.inc"
66 X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
67 : TargetAsmParser(T), Parser(_Parser), TM(TM) {
69 // Initialize the set of available features.
70 setAvailableFeatures(ComputeAvailableFeatures(
71 &TM.getSubtarget<X86Subtarget>()));
74 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
75 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
77 virtual bool ParseDirective(AsmToken DirectiveID);
80 class X86_32ATTAsmParser : public X86ATTAsmParser {
82 X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
83 : X86ATTAsmParser(T, _Parser, TM) {
88 class X86_64ATTAsmParser : public X86ATTAsmParser {
90 X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
91 : X86ATTAsmParser(T, _Parser, TM) {
96 } // end anonymous namespace
98 /// @name Auto-generated Match Functions
101 static unsigned MatchRegisterName(StringRef Name);
107 /// X86Operand - Instances of this class represent a parsed X86 machine
109 struct X86Operand : public MCParsedAsmOperand {
117 SMLoc StartLoc, EndLoc;
142 X86Operand(KindTy K, SMLoc Start, SMLoc End)
143 : Kind(K), StartLoc(Start), EndLoc(End) {}
145 /// getStartLoc - Get the location of the first token of this operand.
146 SMLoc getStartLoc() const { return StartLoc; }
147 /// getEndLoc - Get the location of the last token of this operand.
148 SMLoc getEndLoc() const { return EndLoc; }
150 virtual void dump(raw_ostream &OS) const {}
152 StringRef getToken() const {
153 assert(Kind == Token && "Invalid access!");
154 return StringRef(Tok.Data, Tok.Length);
156 void setTokenValue(StringRef Value) {
157 assert(Kind == Token && "Invalid access!");
158 Tok.Data = Value.data();
159 Tok.Length = Value.size();
162 unsigned getReg() const {
163 assert(Kind == Register && "Invalid access!");
167 const MCExpr *getImm() const {
168 assert(Kind == Immediate && "Invalid access!");
172 const MCExpr *getMemDisp() const {
173 assert(Kind == Memory && "Invalid access!");
176 unsigned getMemSegReg() const {
177 assert(Kind == Memory && "Invalid access!");
180 unsigned getMemBaseReg() const {
181 assert(Kind == Memory && "Invalid access!");
184 unsigned getMemIndexReg() const {
185 assert(Kind == Memory && "Invalid access!");
188 unsigned getMemScale() const {
189 assert(Kind == Memory && "Invalid access!");
193 bool isToken() const {return Kind == Token; }
195 bool isImm() const { return Kind == Immediate; }
197 bool isImmSExti16i8() const {
201 // If this isn't a constant expr, just assume it fits and let relaxation
203 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
207 // Otherwise, check the value is in a range that makes sense for this
209 uint64_t Value = CE->getValue();
210 return (( Value <= 0x000000000000007FULL)||
211 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
212 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
214 bool isImmSExti32i8() const {
218 // If this isn't a constant expr, just assume it fits and let relaxation
220 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
224 // Otherwise, check the value is in a range that makes sense for this
226 uint64_t Value = CE->getValue();
227 return (( Value <= 0x000000000000007FULL)||
228 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
229 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
231 bool isImmSExti64i8() const {
235 // If this isn't a constant expr, just assume it fits and let relaxation
237 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
241 // Otherwise, check the value is in a range that makes sense for this
243 uint64_t Value = CE->getValue();
244 return (( Value <= 0x000000000000007FULL)||
245 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
247 bool isImmSExti64i32() const {
251 // If this isn't a constant expr, just assume it fits and let relaxation
253 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
257 // Otherwise, check the value is in a range that makes sense for this
259 uint64_t Value = CE->getValue();
260 return (( Value <= 0x000000007FFFFFFFULL)||
261 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
264 bool isMem() const { return Kind == Memory; }
266 bool isAbsMem() const {
267 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
268 !getMemIndexReg() && getMemScale() == 1;
271 bool isReg() const { return Kind == Register; }
273 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
274 // Add as immediates when possible.
275 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
276 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
278 Inst.addOperand(MCOperand::CreateExpr(Expr));
281 void addRegOperands(MCInst &Inst, unsigned N) const {
282 assert(N == 1 && "Invalid number of operands!");
283 Inst.addOperand(MCOperand::CreateReg(getReg()));
286 void addImmOperands(MCInst &Inst, unsigned N) const {
287 assert(N == 1 && "Invalid number of operands!");
288 addExpr(Inst, getImm());
291 void addMemOperands(MCInst &Inst, unsigned N) const {
292 assert((N == 5) && "Invalid number of operands!");
293 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
294 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
295 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
296 addExpr(Inst, getMemDisp());
297 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
300 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
301 assert((N == 1) && "Invalid number of operands!");
302 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
305 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
306 X86Operand *Res = new X86Operand(Token, Loc, Loc);
307 Res->Tok.Data = Str.data();
308 Res->Tok.Length = Str.size();
312 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
313 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
314 Res->Reg.RegNo = RegNo;
318 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
319 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
324 /// Create an absolute memory operand.
325 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
327 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
329 Res->Mem.Disp = Disp;
330 Res->Mem.BaseReg = 0;
331 Res->Mem.IndexReg = 0;
336 /// Create a generalized memory operand.
337 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
338 unsigned BaseReg, unsigned IndexReg,
339 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
340 // We should never just have a displacement, that should be parsed as an
341 // absolute memory operand.
342 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
344 // The scale should always be one of {1,2,4,8}.
345 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
347 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
348 Res->Mem.SegReg = SegReg;
349 Res->Mem.Disp = Disp;
350 Res->Mem.BaseReg = BaseReg;
351 Res->Mem.IndexReg = IndexReg;
352 Res->Mem.Scale = Scale;
357 } // end anonymous namespace.
360 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
361 SMLoc &StartLoc, SMLoc &EndLoc) {
363 const AsmToken &TokPercent = Parser.getTok();
364 assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
365 StartLoc = TokPercent.getLoc();
366 Parser.Lex(); // Eat percent token.
368 const AsmToken &Tok = Parser.getTok();
369 if (Tok.isNot(AsmToken::Identifier))
370 return Error(Tok.getLoc(), "invalid register name");
372 // FIXME: Validate register for the current architecture; we have to do
373 // validation later, so maybe there is no need for this here.
374 RegNo = MatchRegisterName(Tok.getString());
376 // FIXME: This should be done using Requires<In32BitMode> and
377 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
378 // can be also checked.
379 if (RegNo == X86::RIZ && !Is64Bit)
380 return Error(Tok.getLoc(), "riz register in 64-bit mode only");
382 // Parse %st(1) and "%st" as "%st(0)"
383 if (RegNo == 0 && Tok.getString() == "st") {
385 EndLoc = Tok.getLoc();
386 Parser.Lex(); // Eat 'st'
388 // Check to see if we have '(4)' after %st.
389 if (getLexer().isNot(AsmToken::LParen))
394 const AsmToken &IntTok = Parser.getTok();
395 if (IntTok.isNot(AsmToken::Integer))
396 return Error(IntTok.getLoc(), "expected stack index");
397 switch (IntTok.getIntVal()) {
398 case 0: RegNo = X86::ST0; break;
399 case 1: RegNo = X86::ST1; break;
400 case 2: RegNo = X86::ST2; break;
401 case 3: RegNo = X86::ST3; break;
402 case 4: RegNo = X86::ST4; break;
403 case 5: RegNo = X86::ST5; break;
404 case 6: RegNo = X86::ST6; break;
405 case 7: RegNo = X86::ST7; break;
406 default: return Error(IntTok.getLoc(), "invalid stack index");
409 if (getParser().Lex().isNot(AsmToken::RParen))
410 return Error(Parser.getTok().getLoc(), "expected ')'");
412 EndLoc = Tok.getLoc();
413 Parser.Lex(); // Eat ')'
417 // If this is "db[0-7]", match it as an alias
419 if (RegNo == 0 && Tok.getString().size() == 3 &&
420 Tok.getString().startswith("db")) {
421 switch (Tok.getString()[2]) {
422 case '0': RegNo = X86::DR0; break;
423 case '1': RegNo = X86::DR1; break;
424 case '2': RegNo = X86::DR2; break;
425 case '3': RegNo = X86::DR3; break;
426 case '4': RegNo = X86::DR4; break;
427 case '5': RegNo = X86::DR5; break;
428 case '6': RegNo = X86::DR6; break;
429 case '7': RegNo = X86::DR7; break;
433 EndLoc = Tok.getLoc();
434 Parser.Lex(); // Eat it.
440 return Error(Tok.getLoc(), "invalid register name");
442 EndLoc = Tok.getLoc();
443 Parser.Lex(); // Eat identifier token.
447 X86Operand *X86ATTAsmParser::ParseOperand() {
448 switch (getLexer().getKind()) {
450 // Parse a memory operand with no segment register.
451 return ParseMemOperand(0, Parser.getTok().getLoc());
452 case AsmToken::Percent: {
453 // Read the register.
456 if (ParseRegister(RegNo, Start, End)) return 0;
457 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
458 Error(Start, "eiz and riz can only be used as index registers");
462 // If this is a segment register followed by a ':', then this is the start
463 // of a memory reference, otherwise this is a normal register reference.
464 if (getLexer().isNot(AsmToken::Colon))
465 return X86Operand::CreateReg(RegNo, Start, End);
468 getParser().Lex(); // Eat the colon.
469 return ParseMemOperand(RegNo, Start);
471 case AsmToken::Dollar: {
473 SMLoc Start = Parser.getTok().getLoc(), End;
476 if (getParser().ParseExpression(Val, End))
478 return X86Operand::CreateImm(Val, Start, End);
483 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
484 /// has already been parsed if present.
485 X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
487 // We have to disambiguate a parenthesized expression "(4+5)" from the start
488 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
489 // only way to do this without lookahead is to eat the '(' and see what is
491 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
492 if (getLexer().isNot(AsmToken::LParen)) {
494 if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
496 // After parsing the base expression we could either have a parenthesized
497 // memory address or not. If not, return now. If so, eat the (.
498 if (getLexer().isNot(AsmToken::LParen)) {
499 // Unless we have a segment register, treat this as an immediate.
501 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
502 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
508 // Okay, we have a '('. We don't know if this is an expression or not, but
509 // so we have to eat the ( to see beyond it.
510 SMLoc LParenLoc = Parser.getTok().getLoc();
511 Parser.Lex(); // Eat the '('.
513 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
514 // Nothing to do here, fall into the code below with the '(' part of the
515 // memory operand consumed.
519 // It must be an parenthesized expression, parse it now.
520 if (getParser().ParseParenExpression(Disp, ExprEnd))
523 // After parsing the base expression we could either have a parenthesized
524 // memory address or not. If not, return now. If so, eat the (.
525 if (getLexer().isNot(AsmToken::LParen)) {
526 // Unless we have a segment register, treat this as an immediate.
528 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
529 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
537 // If we reached here, then we just ate the ( of the memory operand. Process
538 // the rest of the memory operand.
539 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
541 if (getLexer().is(AsmToken::Percent)) {
543 if (ParseRegister(BaseReg, L, L)) return 0;
544 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
545 Error(L, "eiz and riz can only be used as index registers");
550 if (getLexer().is(AsmToken::Comma)) {
551 Parser.Lex(); // Eat the comma.
553 // Following the comma we should have either an index register, or a scale
554 // value. We don't support the later form, but we want to parse it
557 // Not that even though it would be completely consistent to support syntax
558 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
559 if (getLexer().is(AsmToken::Percent)) {
561 if (ParseRegister(IndexReg, L, L)) return 0;
563 if (getLexer().isNot(AsmToken::RParen)) {
564 // Parse the scale amount:
565 // ::= ',' [scale-expression]
566 if (getLexer().isNot(AsmToken::Comma)) {
567 Error(Parser.getTok().getLoc(),
568 "expected comma in scale expression");
571 Parser.Lex(); // Eat the comma.
573 if (getLexer().isNot(AsmToken::RParen)) {
574 SMLoc Loc = Parser.getTok().getLoc();
577 if (getParser().ParseAbsoluteExpression(ScaleVal))
580 // Validate the scale amount.
581 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
582 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
585 Scale = (unsigned)ScaleVal;
588 } else if (getLexer().isNot(AsmToken::RParen)) {
589 // A scale amount without an index is ignored.
591 SMLoc Loc = Parser.getTok().getLoc();
594 if (getParser().ParseAbsoluteExpression(Value))
598 Warning(Loc, "scale factor without index register is ignored");
603 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
604 if (getLexer().isNot(AsmToken::RParen)) {
605 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
608 SMLoc MemEnd = Parser.getTok().getLoc();
609 Parser.Lex(); // Eat the ')'.
611 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
615 bool X86ATTAsmParser::
616 ParseInstruction(StringRef Name, SMLoc NameLoc,
617 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
618 // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
619 // represent alternative syntaxes in the .td file, without requiring
620 // instruction duplication.
621 StringRef PatchedName = StringSwitch<StringRef>(Name)
623 .Case("salb", "shlb")
624 .Case("sall", "shll")
625 .Case("salq", "shlq")
626 .Case("salw", "shlw")
629 .Case("repnz", "repne")
630 .Case("iret", "iretl")
631 .Case("sysret", "sysretl")
632 .Case("push", Is64Bit ? "pushq" : "pushl")
633 .Case("pop", Is64Bit ? "popq" : "popl")
634 .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
635 .Case("popf", Is64Bit ? "popfq" : "popfl")
636 .Case("pushfd", "pushfl")
637 .Case("popfd", "popfl")
638 .Case("retl", Is64Bit ? "retl" : "ret")
639 .Case("retq", Is64Bit ? "ret" : "retq")
640 .Case("setz", "sete") .Case("setnz", "setne")
641 .Case("setc", "setb") .Case("setna", "setbe")
642 .Case("setnae", "setb").Case("setnb", "setae")
643 .Case("setnbe", "seta").Case("setnc", "setae")
644 .Case("setng", "setle").Case("setnge", "setl")
645 .Case("setnl", "setge").Case("setnle", "setg")
646 .Case("setpe", "setp") .Case("setpo", "setnp")
647 .Case("jz", "je") .Case("jnz", "jne")
648 .Case("jc", "jb") .Case("jna", "jbe")
649 .Case("jnae", "jb").Case("jnb", "jae")
650 .Case("jnbe", "ja").Case("jnc", "jae")
651 .Case("jng", "jle").Case("jnge", "jl")
652 .Case("jnl", "jge").Case("jnle", "jg")
653 .Case("jpe", "jp") .Case("jpo", "jnp")
654 // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands.
655 .Case("cmovcw", "cmovbw") .Case("cmovcl", "cmovbl")
656 .Case("cmovcq", "cmovbq") .Case("cmovc", "cmovb")
657 .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel")
658 .Case("cmovnaq", "cmovbeq").Case("cmovna", "cmovbe")
659 .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael")
660 .Case("cmovnbq", "cmovaeq").Case("cmovnb", "cmovae")
661 .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval")
662 .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova")
663 .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael")
664 .Case("cmovncq", "cmovaeq").Case("cmovnc", "cmovae")
665 .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
666 .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle")
667 .Case("cmovnw", "cmovgew").Case("cmovnl", "cmovgel")
668 .Case("cmovnq", "cmovgeq").Case("cmovn", "cmovge")
669 .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
670 .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle")
671 .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll")
672 .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl")
673 .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel")
674 .Case("cmovnlq", "cmovgeq").Case("cmovnl", "cmovge")
675 .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl")
676 .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg")
677 .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel")
678 .Case("cmovnzq", "cmovneq").Case("cmovnz", "cmovne")
679 .Case("cmovzw", "cmovew") .Case("cmovzl", "cmovel")
680 .Case("cmovzq", "cmoveq") .Case("cmovz", "cmove")
681 .Case("fwait", "wait")
682 .Case("movzx", "movzb")
685 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
686 const MCExpr *ExtraImmOp = 0;
687 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
688 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
689 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
690 bool IsVCMP = PatchedName.startswith("vcmp");
691 unsigned SSECCIdx = IsVCMP ? 4 : 3;
692 unsigned SSEComparisonCode = StringSwitch<unsigned>(
693 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
706 .Case("neq_oq", 0x0C)
713 .Case("unord_s", 0x13)
714 .Case("neq_us", 0x14)
715 .Case("nlt_uq", 0x15)
716 .Case("nle_uq", 0x16)
719 .Case("nge_uq", 0x19)
720 .Case("ngt_uq", 0x1A)
721 .Case("false_os", 0x1B)
722 .Case("neq_os", 0x1C)
725 .Case("true_us", 0x1F)
727 if (SSEComparisonCode != ~0U) {
728 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
729 getParser().getContext());
730 if (PatchedName.endswith("ss")) {
731 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
732 } else if (PatchedName.endswith("sd")) {
733 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
734 } else if (PatchedName.endswith("ps")) {
735 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
737 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
738 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
743 // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
744 if (PatchedName.startswith("vpclmul")) {
745 unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
746 PatchedName.slice(7, PatchedName.size() - 2))
747 .Case("lqlq", 0x00) // src1[63:0], src2[63:0]
748 .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
749 .Case("lqhq", 0x10) // src1[63:0], src2[127:64]
750 .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
752 if (CLMULQuadWordSelect != ~0U) {
753 ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
754 getParser().getContext());
755 assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
756 PatchedName = "vpclmulqdq";
760 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
763 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
766 // Determine whether this is an instruction prefix.
768 PatchedName == "lock" || PatchedName == "rep" ||
769 PatchedName == "repne";
772 // This does the actual operand parsing. Don't parse any more if we have a
773 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
774 // just want to parse the "lock" as the first instruction and the "incl" as
776 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
778 // Parse '*' modifier.
779 if (getLexer().is(AsmToken::Star)) {
780 SMLoc Loc = Parser.getTok().getLoc();
781 Operands.push_back(X86Operand::CreateToken("*", Loc));
782 Parser.Lex(); // Eat the star.
785 // Read the first operand.
786 if (X86Operand *Op = ParseOperand())
787 Operands.push_back(Op);
789 Parser.EatToEndOfStatement();
793 while (getLexer().is(AsmToken::Comma)) {
794 Parser.Lex(); // Eat the comma.
796 // Parse and remember the operand.
797 if (X86Operand *Op = ParseOperand())
798 Operands.push_back(Op);
800 Parser.EatToEndOfStatement();
805 if (getLexer().isNot(AsmToken::EndOfStatement)) {
806 Parser.EatToEndOfStatement();
807 return TokError("unexpected token in argument list");
811 if (getLexer().is(AsmToken::EndOfStatement))
812 Parser.Lex(); // Consume the EndOfStatement
814 // FIXME: Hack to handle recognize s{hr,ar,hl} <op>, $1. Canonicalize to
816 if ((Name.startswith("shr") || Name.startswith("sar") ||
817 Name.startswith("shl")) &&
818 Operands.size() == 3) {
819 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
820 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
821 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
823 Operands.erase(Operands.begin() + 1);
827 // FIXME: Hack to handle recognize "in[bwl] <op>". Canonicalize it to
829 if ((Name == "inb" || Name == "inw" || Name == "inl") &&
830 Operands.size() == 2) {
833 Reg = MatchRegisterName("al");
834 else if (Name[2] == 'w')
835 Reg = MatchRegisterName("ax");
837 Reg = MatchRegisterName("eax");
838 SMLoc Loc = Operands.back()->getEndLoc();
839 Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
842 // FIXME: Hack to handle recognize "out[bwl] <op>". Canonicalize it to
844 if ((Name == "outb" || Name == "outw" || Name == "outl") &&
845 Operands.size() == 2) {
848 Reg = MatchRegisterName("al");
849 else if (Name[3] == 'w')
850 Reg = MatchRegisterName("ax");
852 Reg = MatchRegisterName("eax");
853 SMLoc Loc = Operands.back()->getEndLoc();
854 Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
855 std::swap(Operands[1], Operands[2]);
858 // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
859 // "f{mul*,add*,sub*,div*} $op"
860 if ((Name.startswith("fmul") || Name.startswith("fadd") ||
861 Name.startswith("fsub") || Name.startswith("fdiv")) &&
862 Operands.size() == 3 &&
863 static_cast<X86Operand*>(Operands[2])->isReg() &&
864 static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
866 Operands.erase(Operands.begin() + 2);
869 // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
871 if (Name.startswith("imul") && Operands.size() == 3 &&
872 static_cast<X86Operand*>(Operands[1])->isImm() &&
873 static_cast<X86Operand*>(Operands.back())->isReg()) {
874 X86Operand *Op = static_cast<X86Operand*>(Operands.back());
875 Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
879 // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
880 // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
881 // errors, since its encoding is the most compact.
882 if (Name == "sldt" && Operands.size() == 2 &&
883 static_cast<X86Operand*>(Operands[1])->isMem()) {
885 Operands[0] = X86Operand::CreateToken("sldtw", NameLoc);
888 // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as
889 // synonyms. Our tables only have the "<reg>, <mem>" form, so if we see the
890 // other operand order, swap them.
891 if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq"||
893 if (Operands.size() == 3 &&
894 static_cast<X86Operand*>(Operands[1])->isMem() &&
895 static_cast<X86Operand*>(Operands[2])->isReg()) {
896 std::swap(Operands[1], Operands[2]);
899 // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as
900 // synonyms. Our tables only have the "<mem>, <reg>" form, so if we see the
901 // other operand order, swap them.
902 if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq"||
904 if (Operands.size() == 3 &&
905 static_cast<X86Operand*>(Operands[1])->isReg() &&
906 static_cast<X86Operand*>(Operands[2])->isMem()) {
907 std::swap(Operands[1], Operands[2]);
913 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
914 StringRef IDVal = DirectiveID.getIdentifier();
915 if (IDVal == ".word")
916 return ParseDirectiveWord(2, DirectiveID.getLoc());
920 /// ParseDirectiveWord
921 /// ::= .word [ expression (, expression)* ]
922 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
923 if (getLexer().isNot(AsmToken::EndOfStatement)) {
926 if (getParser().ParseExpression(Value))
929 getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
931 if (getLexer().is(AsmToken::EndOfStatement))
934 // FIXME: Improve diagnostic.
935 if (getLexer().isNot(AsmToken::Comma))
936 return Error(L, "unexpected token in directive");
947 X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
948 const SmallVectorImpl<MCParsedAsmOperand*>
951 assert(!Operands.empty() && "Unexpect empty operand list!");
953 bool WasOriginallyInvalidOperand = false;
954 unsigned OrigErrorInfo;
956 // First, try a direct match.
957 switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
960 case Match_MissingFeature:
961 Error(IDLoc, "instruction requires a CPU feature not currently enabled");
963 case Match_InvalidOperand:
964 WasOriginallyInvalidOperand = true;
966 case Match_MnemonicFail:
970 // FIXME: Ideally, we would only attempt suffix matches for things which are
971 // valid prefixes, and we could just infer the right unambiguous
972 // type. However, that requires substantially more matcher support than the
975 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
976 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
978 // Change the operand to point to a temporary token.
979 StringRef Base = Op->getToken();
983 Op->setTokenValue(Tmp.str());
985 // Check for the various suffix matches.
986 Tmp[Base.size()] = 'b';
987 unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo;
988 MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo);
989 Tmp[Base.size()] = 'w';
990 MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo);
991 Tmp[Base.size()] = 'l';
992 MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo);
993 Tmp[Base.size()] = 'q';
994 MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo);
996 // Restore the old token.
997 Op->setTokenValue(Base);
999 // If exactly one matched, then we treat that as a successful match (and the
1000 // instruction will already have been filled in correctly, since the failing
1001 // matches won't have modified it).
1002 unsigned NumSuccessfulMatches =
1003 (MatchB == Match_Success) + (MatchW == Match_Success) +
1004 (MatchL == Match_Success) + (MatchQ == Match_Success);
1005 if (NumSuccessfulMatches == 1)
1008 // Otherwise, the match failed, try to produce a decent error message.
1010 // If we had multiple suffix matches, then identify this as an ambiguous
1012 if (NumSuccessfulMatches > 1) {
1014 unsigned NumMatches = 0;
1015 if (MatchB == Match_Success)
1016 MatchChars[NumMatches++] = 'b';
1017 if (MatchW == Match_Success)
1018 MatchChars[NumMatches++] = 'w';
1019 if (MatchL == Match_Success)
1020 MatchChars[NumMatches++] = 'l';
1021 if (MatchQ == Match_Success)
1022 MatchChars[NumMatches++] = 'q';
1024 SmallString<126> Msg;
1025 raw_svector_ostream OS(Msg);
1026 OS << "ambiguous instructions require an explicit suffix (could be ";
1027 for (unsigned i = 0; i != NumMatches; ++i) {
1030 if (i + 1 == NumMatches)
1032 OS << "'" << Base << MatchChars[i] << "'";
1035 Error(IDLoc, OS.str());
1039 // Okay, we know that none of the variants matched successfully.
1041 // If all of the instructions reported an invalid mnemonic, then the original
1042 // mnemonic was invalid.
1043 if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) &&
1044 (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) {
1045 if (!WasOriginallyInvalidOperand) {
1046 Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
1050 // Recover location info for the operand if we know which was the problem.
1051 SMLoc ErrorLoc = IDLoc;
1052 if (OrigErrorInfo != ~0U) {
1053 ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
1054 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
1057 Error(ErrorLoc, "invalid operand for instruction");
1061 // If one instruction matched with a missing feature, report this as a
1063 if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) +
1064 (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){
1065 Error(IDLoc, "instruction requires a CPU feature not currently enabled");
1069 // If one instruction matched with an invalid operand, report this as an
1071 if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) +
1072 (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){
1073 Error(IDLoc, "invalid operand for instruction");
1077 // If all of these were an outright failure, report it in a useless way.
1078 // FIXME: We should give nicer diagnostics about the exact failure.
1079 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
1084 extern "C" void LLVMInitializeX86AsmLexer();
1086 // Force static initialization.
1087 extern "C" void LLVMInitializeX86AsmParser() {
1088 RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
1089 RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
1090 LLVMInitializeX86AsmLexer();
1093 #define GET_REGISTER_MATCHER
1094 #define GET_MATCHER_IMPLEMENTATION
1095 #include "X86GenAsmMatcher.inc"