1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Target/TargetAsmParser.h"
12 #include "X86Subtarget.h"
13 #include "llvm/Target/TargetRegistry.h"
14 #include "llvm/Target/TargetAsmParser.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCParser/MCAsmLexer.h"
19 #include "llvm/MC/MCParser/MCAsmParser.h"
20 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Support/SourceMgr.h"
27 #include "llvm/Support/raw_ostream.h"
33 class X86ATTAsmParser : public TargetAsmParser {
41 MCAsmParser &getParser() const { return Parser; }
43 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
45 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
47 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
49 X86Operand *ParseOperand();
50 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
52 bool ParseDirectiveWord(unsigned Size, SMLoc L);
54 bool MatchAndEmitInstruction(SMLoc IDLoc,
55 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
58 /// @name Auto-generated Matcher Functions
61 #define GET_ASSEMBLER_HEADER
62 #include "X86GenAsmMatcher.inc"
67 X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
68 : TargetAsmParser(T), Parser(_Parser), TM(TM) {
70 // Initialize the set of available features.
71 setAvailableFeatures(ComputeAvailableFeatures(
72 &TM.getSubtarget<X86Subtarget>()));
75 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
76 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
78 virtual bool ParseDirective(AsmToken DirectiveID);
81 class X86_32ATTAsmParser : public X86ATTAsmParser {
83 X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
84 : X86ATTAsmParser(T, _Parser, TM) {
89 class X86_64ATTAsmParser : public X86ATTAsmParser {
91 X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
92 : X86ATTAsmParser(T, _Parser, TM) {
97 } // end anonymous namespace
99 /// @name Auto-generated Match Functions
102 static unsigned MatchRegisterName(StringRef Name);
108 /// X86Operand - Instances of this class represent a parsed X86 machine
110 struct X86Operand : public MCParsedAsmOperand {
118 SMLoc StartLoc, EndLoc;
143 X86Operand(KindTy K, SMLoc Start, SMLoc End)
144 : Kind(K), StartLoc(Start), EndLoc(End) {}
146 /// getStartLoc - Get the location of the first token of this operand.
147 SMLoc getStartLoc() const { return StartLoc; }
148 /// getEndLoc - Get the location of the last token of this operand.
149 SMLoc getEndLoc() const { return EndLoc; }
151 virtual void dump(raw_ostream &OS) const {}
153 StringRef getToken() const {
154 assert(Kind == Token && "Invalid access!");
155 return StringRef(Tok.Data, Tok.Length);
157 void setTokenValue(StringRef Value) {
158 assert(Kind == Token && "Invalid access!");
159 Tok.Data = Value.data();
160 Tok.Length = Value.size();
163 unsigned getReg() const {
164 assert(Kind == Register && "Invalid access!");
168 const MCExpr *getImm() const {
169 assert(Kind == Immediate && "Invalid access!");
173 const MCExpr *getMemDisp() const {
174 assert(Kind == Memory && "Invalid access!");
177 unsigned getMemSegReg() const {
178 assert(Kind == Memory && "Invalid access!");
181 unsigned getMemBaseReg() const {
182 assert(Kind == Memory && "Invalid access!");
185 unsigned getMemIndexReg() const {
186 assert(Kind == Memory && "Invalid access!");
189 unsigned getMemScale() const {
190 assert(Kind == Memory && "Invalid access!");
194 bool isToken() const {return Kind == Token; }
196 bool isImm() const { return Kind == Immediate; }
198 bool isImmSExti16i8() const {
202 // If this isn't a constant expr, just assume it fits and let relaxation
204 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
208 // Otherwise, check the value is in a range that makes sense for this
210 uint64_t Value = CE->getValue();
211 return (( Value <= 0x000000000000007FULL)||
212 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
213 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
215 bool isImmSExti32i8() const {
219 // If this isn't a constant expr, just assume it fits and let relaxation
221 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
225 // Otherwise, check the value is in a range that makes sense for this
227 uint64_t Value = CE->getValue();
228 return (( Value <= 0x000000000000007FULL)||
229 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
230 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
232 bool isImmSExti64i8() const {
236 // If this isn't a constant expr, just assume it fits and let relaxation
238 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
242 // Otherwise, check the value is in a range that makes sense for this
244 uint64_t Value = CE->getValue();
245 return (( Value <= 0x000000000000007FULL)||
246 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
248 bool isImmSExti64i32() const {
252 // If this isn't a constant expr, just assume it fits and let relaxation
254 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
258 // Otherwise, check the value is in a range that makes sense for this
260 uint64_t Value = CE->getValue();
261 return (( Value <= 0x000000007FFFFFFFULL)||
262 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
265 bool isMem() const { return Kind == Memory; }
267 bool isAbsMem() const {
268 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
269 !getMemIndexReg() && getMemScale() == 1;
272 bool isReg() const { return Kind == Register; }
274 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
275 // Add as immediates when possible.
276 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
277 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
279 Inst.addOperand(MCOperand::CreateExpr(Expr));
282 void addRegOperands(MCInst &Inst, unsigned N) const {
283 assert(N == 1 && "Invalid number of operands!");
284 Inst.addOperand(MCOperand::CreateReg(getReg()));
287 void addImmOperands(MCInst &Inst, unsigned N) const {
288 assert(N == 1 && "Invalid number of operands!");
289 addExpr(Inst, getImm());
292 void addMemOperands(MCInst &Inst, unsigned N) const {
293 assert((N == 5) && "Invalid number of operands!");
294 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
295 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
296 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
297 addExpr(Inst, getMemDisp());
298 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
301 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
302 assert((N == 1) && "Invalid number of operands!");
303 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
306 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
307 X86Operand *Res = new X86Operand(Token, Loc, Loc);
308 Res->Tok.Data = Str.data();
309 Res->Tok.Length = Str.size();
313 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
314 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
315 Res->Reg.RegNo = RegNo;
319 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
320 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
325 /// Create an absolute memory operand.
326 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
328 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
330 Res->Mem.Disp = Disp;
331 Res->Mem.BaseReg = 0;
332 Res->Mem.IndexReg = 0;
337 /// Create a generalized memory operand.
338 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
339 unsigned BaseReg, unsigned IndexReg,
340 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
341 // We should never just have a displacement, that should be parsed as an
342 // absolute memory operand.
343 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
345 // The scale should always be one of {1,2,4,8}.
346 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
348 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
349 Res->Mem.SegReg = SegReg;
350 Res->Mem.Disp = Disp;
351 Res->Mem.BaseReg = BaseReg;
352 Res->Mem.IndexReg = IndexReg;
353 Res->Mem.Scale = Scale;
358 } // end anonymous namespace.
361 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
362 SMLoc &StartLoc, SMLoc &EndLoc) {
364 const AsmToken &TokPercent = Parser.getTok();
365 assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
366 StartLoc = TokPercent.getLoc();
367 Parser.Lex(); // Eat percent token.
369 const AsmToken &Tok = Parser.getTok();
370 if (Tok.isNot(AsmToken::Identifier))
371 return Error(Tok.getLoc(), "invalid register name");
373 // FIXME: Validate register for the current architecture; we have to do
374 // validation later, so maybe there is no need for this here.
375 RegNo = MatchRegisterName(Tok.getString());
377 // If the match failed, try the register name as lowercase.
379 RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
381 // FIXME: This should be done using Requires<In32BitMode> and
382 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
383 // can be also checked.
384 if (RegNo == X86::RIZ && !Is64Bit)
385 return Error(Tok.getLoc(), "riz register in 64-bit mode only");
387 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
388 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
390 EndLoc = Tok.getLoc();
391 Parser.Lex(); // Eat 'st'
393 // Check to see if we have '(4)' after %st.
394 if (getLexer().isNot(AsmToken::LParen))
399 const AsmToken &IntTok = Parser.getTok();
400 if (IntTok.isNot(AsmToken::Integer))
401 return Error(IntTok.getLoc(), "expected stack index");
402 switch (IntTok.getIntVal()) {
403 case 0: RegNo = X86::ST0; break;
404 case 1: RegNo = X86::ST1; break;
405 case 2: RegNo = X86::ST2; break;
406 case 3: RegNo = X86::ST3; break;
407 case 4: RegNo = X86::ST4; break;
408 case 5: RegNo = X86::ST5; break;
409 case 6: RegNo = X86::ST6; break;
410 case 7: RegNo = X86::ST7; break;
411 default: return Error(IntTok.getLoc(), "invalid stack index");
414 if (getParser().Lex().isNot(AsmToken::RParen))
415 return Error(Parser.getTok().getLoc(), "expected ')'");
417 EndLoc = Tok.getLoc();
418 Parser.Lex(); // Eat ')'
422 // If this is "db[0-7]", match it as an alias
424 if (RegNo == 0 && Tok.getString().size() == 3 &&
425 Tok.getString().startswith("db")) {
426 switch (Tok.getString()[2]) {
427 case '0': RegNo = X86::DR0; break;
428 case '1': RegNo = X86::DR1; break;
429 case '2': RegNo = X86::DR2; break;
430 case '3': RegNo = X86::DR3; break;
431 case '4': RegNo = X86::DR4; break;
432 case '5': RegNo = X86::DR5; break;
433 case '6': RegNo = X86::DR6; break;
434 case '7': RegNo = X86::DR7; break;
438 EndLoc = Tok.getLoc();
439 Parser.Lex(); // Eat it.
445 return Error(Tok.getLoc(), "invalid register name");
447 EndLoc = Tok.getLoc();
448 Parser.Lex(); // Eat identifier token.
452 X86Operand *X86ATTAsmParser::ParseOperand() {
453 switch (getLexer().getKind()) {
455 // Parse a memory operand with no segment register.
456 return ParseMemOperand(0, Parser.getTok().getLoc());
457 case AsmToken::Percent: {
458 // Read the register.
461 if (ParseRegister(RegNo, Start, End)) return 0;
462 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
463 Error(Start, "eiz and riz can only be used as index registers");
467 // If this is a segment register followed by a ':', then this is the start
468 // of a memory reference, otherwise this is a normal register reference.
469 if (getLexer().isNot(AsmToken::Colon))
470 return X86Operand::CreateReg(RegNo, Start, End);
473 getParser().Lex(); // Eat the colon.
474 return ParseMemOperand(RegNo, Start);
476 case AsmToken::Dollar: {
478 SMLoc Start = Parser.getTok().getLoc(), End;
481 if (getParser().ParseExpression(Val, End))
483 return X86Operand::CreateImm(Val, Start, End);
488 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
489 /// has already been parsed if present.
490 X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
492 // We have to disambiguate a parenthesized expression "(4+5)" from the start
493 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
494 // only way to do this without lookahead is to eat the '(' and see what is
496 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
497 if (getLexer().isNot(AsmToken::LParen)) {
499 if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
501 // After parsing the base expression we could either have a parenthesized
502 // memory address or not. If not, return now. If so, eat the (.
503 if (getLexer().isNot(AsmToken::LParen)) {
504 // Unless we have a segment register, treat this as an immediate.
506 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
507 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
513 // Okay, we have a '('. We don't know if this is an expression or not, but
514 // so we have to eat the ( to see beyond it.
515 SMLoc LParenLoc = Parser.getTok().getLoc();
516 Parser.Lex(); // Eat the '('.
518 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
519 // Nothing to do here, fall into the code below with the '(' part of the
520 // memory operand consumed.
524 // It must be an parenthesized expression, parse it now.
525 if (getParser().ParseParenExpression(Disp, ExprEnd))
528 // After parsing the base expression we could either have a parenthesized
529 // memory address or not. If not, return now. If so, eat the (.
530 if (getLexer().isNot(AsmToken::LParen)) {
531 // Unless we have a segment register, treat this as an immediate.
533 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
534 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
542 // If we reached here, then we just ate the ( of the memory operand. Process
543 // the rest of the memory operand.
544 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
546 if (getLexer().is(AsmToken::Percent)) {
548 if (ParseRegister(BaseReg, L, L)) return 0;
549 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
550 Error(L, "eiz and riz can only be used as index registers");
555 if (getLexer().is(AsmToken::Comma)) {
556 Parser.Lex(); // Eat the comma.
558 // Following the comma we should have either an index register, or a scale
559 // value. We don't support the later form, but we want to parse it
562 // Not that even though it would be completely consistent to support syntax
563 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
564 if (getLexer().is(AsmToken::Percent)) {
566 if (ParseRegister(IndexReg, L, L)) return 0;
568 if (getLexer().isNot(AsmToken::RParen)) {
569 // Parse the scale amount:
570 // ::= ',' [scale-expression]
571 if (getLexer().isNot(AsmToken::Comma)) {
572 Error(Parser.getTok().getLoc(),
573 "expected comma in scale expression");
576 Parser.Lex(); // Eat the comma.
578 if (getLexer().isNot(AsmToken::RParen)) {
579 SMLoc Loc = Parser.getTok().getLoc();
582 if (getParser().ParseAbsoluteExpression(ScaleVal))
585 // Validate the scale amount.
586 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
587 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
590 Scale = (unsigned)ScaleVal;
593 } else if (getLexer().isNot(AsmToken::RParen)) {
594 // A scale amount without an index is ignored.
596 SMLoc Loc = Parser.getTok().getLoc();
599 if (getParser().ParseAbsoluteExpression(Value))
603 Warning(Loc, "scale factor without index register is ignored");
608 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
609 if (getLexer().isNot(AsmToken::RParen)) {
610 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
613 SMLoc MemEnd = Parser.getTok().getLoc();
614 Parser.Lex(); // Eat the ')'.
616 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
620 bool X86ATTAsmParser::
621 ParseInstruction(StringRef Name, SMLoc NameLoc,
622 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
623 // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
624 // represent alternative syntaxes in the .td file, without requiring
625 // instruction duplication.
626 StringRef PatchedName = StringSwitch<StringRef>(Name)
628 .Case("salb", "shlb")
629 .Case("sall", "shll")
630 .Case("salq", "shlq")
631 .Case("salw", "shlw")
634 .Case("repnz", "repne")
635 .Case("iret", "iretl")
636 .Case("sysret", "sysretl")
640 .Case("cwde", "cwtl")
641 .Case("cdqe", "cltq")
642 .Case("smovb", "movsb")
643 .Case("smovw", "movsw")
644 .Case("smovl", "movsl")
645 .Case("smovq", "movsq")
646 .Case("push", Is64Bit ? "pushq" : "pushl")
647 .Case("pop", Is64Bit ? "popq" : "popl")
648 .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
649 .Case("popf", Is64Bit ? "popfq" : "popfl")
650 .Case("pushfd", "pushfl")
651 .Case("popfd", "popfl")
652 .Case("retl", Is64Bit ? "retl" : "ret")
653 .Case("retq", Is64Bit ? "ret" : "retq")
654 .Case("setz", "sete") .Case("setnz", "setne")
655 .Case("setc", "setb") .Case("setna", "setbe")
656 .Case("setnae", "setb").Case("setnb", "setae")
657 .Case("setnbe", "seta").Case("setnc", "setae")
658 .Case("setng", "setle").Case("setnge", "setl")
659 .Case("setnl", "setge").Case("setnle", "setg")
660 .Case("setpe", "setp") .Case("setpo", "setnp")
661 .Case("jz", "je") .Case("jnz", "jne")
662 .Case("jc", "jb") .Case("jna", "jbe")
663 .Case("jnae", "jb").Case("jnb", "jae")
664 .Case("jnbe", "ja").Case("jnc", "jae")
665 .Case("jng", "jle").Case("jnge", "jl")
666 .Case("jnl", "jge").Case("jnle", "jg")
667 .Case("jpe", "jp") .Case("jpo", "jnp")
668 // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands.
669 .Case("cmovcw", "cmovbw") .Case("cmovcl", "cmovbl")
670 .Case("cmovcq", "cmovbq") .Case("cmovc", "cmovb")
671 .Case("cmovnaew","cmovbw") .Case("cmovnael","cmovbl")
672 .Case("cmovnaeq","cmovbq") .Case("cmovnae", "cmovb")
673 .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel")
674 .Case("cmovnaq", "cmovbeq").Case("cmovna", "cmovbe")
675 .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael")
676 .Case("cmovnbq", "cmovaeq").Case("cmovnb", "cmovae")
677 .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval")
678 .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova")
679 .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael")
680 .Case("cmovncq", "cmovaeq").Case("cmovnc", "cmovae")
681 .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
682 .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle")
683 .Case("cmovnw", "cmovgew").Case("cmovnl", "cmovgel")
684 .Case("cmovnq", "cmovgeq").Case("cmovn", "cmovge")
685 .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
686 .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle")
687 .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll")
688 .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl")
689 .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel")
690 .Case("cmovnlq", "cmovgeq").Case("cmovnl", "cmovge")
691 .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl")
692 .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg")
693 .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel")
694 .Case("cmovnzq", "cmovneq").Case("cmovnz", "cmovne")
695 .Case("cmovzw", "cmovew") .Case("cmovzl", "cmovel")
696 .Case("cmovzq", "cmoveq") .Case("cmovz", "cmove")
697 // Floating point stack cmov aliases.
698 .Case("fcmovz", "fcmove")
699 .Case("fcmova", "fcmovnbe")
700 .Case("fcmovnae", "fcmovb")
701 .Case("fcmovna", "fcmovbe")
702 .Case("fcmovae", "fcmovnb")
703 .Case("fwait", "wait")
704 .Case("movzx", "movzb") // FIXME: Not correct.
705 .Case("fildq", "fildll")
708 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
709 const MCExpr *ExtraImmOp = 0;
710 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
711 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
712 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
713 bool IsVCMP = PatchedName.startswith("vcmp");
714 unsigned SSECCIdx = IsVCMP ? 4 : 3;
715 unsigned SSEComparisonCode = StringSwitch<unsigned>(
716 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
729 .Case("neq_oq", 0x0C)
736 .Case("unord_s", 0x13)
737 .Case("neq_us", 0x14)
738 .Case("nlt_uq", 0x15)
739 .Case("nle_uq", 0x16)
742 .Case("nge_uq", 0x19)
743 .Case("ngt_uq", 0x1A)
744 .Case("false_os", 0x1B)
745 .Case("neq_os", 0x1C)
748 .Case("true_us", 0x1F)
750 if (SSEComparisonCode != ~0U) {
751 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
752 getParser().getContext());
753 if (PatchedName.endswith("ss")) {
754 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
755 } else if (PatchedName.endswith("sd")) {
756 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
757 } else if (PatchedName.endswith("ps")) {
758 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
760 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
761 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
766 // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
767 if (PatchedName.startswith("vpclmul")) {
768 unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
769 PatchedName.slice(7, PatchedName.size() - 2))
770 .Case("lqlq", 0x00) // src1[63:0], src2[63:0]
771 .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
772 .Case("lqhq", 0x10) // src1[63:0], src2[127:64]
773 .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
775 if (CLMULQuadWordSelect != ~0U) {
776 ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
777 getParser().getContext());
778 assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
779 PatchedName = "vpclmulqdq";
783 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
786 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
789 // Determine whether this is an instruction prefix.
791 PatchedName == "lock" || PatchedName == "rep" ||
792 PatchedName == "repne";
795 // This does the actual operand parsing. Don't parse any more if we have a
796 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
797 // just want to parse the "lock" as the first instruction and the "incl" as
799 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
801 // Parse '*' modifier.
802 if (getLexer().is(AsmToken::Star)) {
803 SMLoc Loc = Parser.getTok().getLoc();
804 Operands.push_back(X86Operand::CreateToken("*", Loc));
805 Parser.Lex(); // Eat the star.
808 // Read the first operand.
809 if (X86Operand *Op = ParseOperand())
810 Operands.push_back(Op);
812 Parser.EatToEndOfStatement();
816 while (getLexer().is(AsmToken::Comma)) {
817 Parser.Lex(); // Eat the comma.
819 // Parse and remember the operand.
820 if (X86Operand *Op = ParseOperand())
821 Operands.push_back(Op);
823 Parser.EatToEndOfStatement();
828 if (getLexer().isNot(AsmToken::EndOfStatement)) {
829 Parser.EatToEndOfStatement();
830 return TokError("unexpected token in argument list");
834 if (getLexer().is(AsmToken::EndOfStatement))
835 Parser.Lex(); // Consume the EndOfStatement
837 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
839 if ((Name.startswith("shr") || Name.startswith("sar") ||
840 Name.startswith("shl")) &&
841 Operands.size() == 3) {
842 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
843 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
844 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
846 Operands.erase(Operands.begin() + 1);
850 // FIXME: Hack to handle recognize "rc[lr] <op>" -> "rcl $1, <op>".
851 if ((Name.startswith("rcl") || Name.startswith("rcr")) &&
852 Operands.size() == 2) {
853 const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext());
854 Operands.push_back(X86Operand::CreateImm(One, NameLoc, NameLoc));
855 std::swap(Operands[1], Operands[2]);
858 // FIXME: Hack to handle recognize "sh[lr]d op,op" -> "shld $1, op,op".
859 if ((Name.startswith("shld") || Name.startswith("shrd")) &&
860 Operands.size() == 3) {
861 const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext());
862 Operands.insert(Operands.begin()+1,
863 X86Operand::CreateImm(One, NameLoc, NameLoc));
867 // FIXME: Hack to handle recognize "in[bwl] <op>". Canonicalize it to
869 if ((Name == "inb" || Name == "inw" || Name == "inl") &&
870 Operands.size() == 2) {
873 Reg = MatchRegisterName("al");
874 else if (Name[2] == 'w')
875 Reg = MatchRegisterName("ax");
877 Reg = MatchRegisterName("eax");
878 SMLoc Loc = Operands.back()->getEndLoc();
879 Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
882 // FIXME: Hack to handle recognize "out[bwl] <op>". Canonicalize it to
884 if ((Name == "outb" || Name == "outw" || Name == "outl") &&
885 Operands.size() == 2) {
888 Reg = MatchRegisterName("al");
889 else if (Name[3] == 'w')
890 Reg = MatchRegisterName("ax");
892 Reg = MatchRegisterName("eax");
893 SMLoc Loc = Operands.back()->getEndLoc();
894 Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
895 std::swap(Operands[1], Operands[2]);
898 // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
899 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
900 Operands.size() == 3) {
901 X86Operand &Op = *(X86Operand*)Operands.back();
902 if (Op.isMem() && Op.Mem.SegReg == 0 &&
903 isa<MCConstantExpr>(Op.Mem.Disp) &&
904 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
905 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
906 SMLoc Loc = Op.getEndLoc();
907 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
912 // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
913 // "f{mul*,add*,sub*,div*} $op"
914 if ((Name.startswith("fmul") || Name.startswith("fadd") ||
915 Name.startswith("fsub") || Name.startswith("fdiv")) &&
916 Operands.size() == 3 &&
917 static_cast<X86Operand*>(Operands[2])->isReg() &&
918 static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
920 Operands.erase(Operands.begin() + 2);
923 // FIXME: Hack to handle "f{mulp,addp} st(0), $op" the same as
924 // "f{mulp,addp} $op", since they commute. We also allow fdivrp/fsubrp even
925 // though they don't commute, solely because gas does support this.
926 if ((Name=="fmulp" || Name=="faddp" || Name=="fsubrp" || Name=="fdivrp") &&
927 Operands.size() == 3 &&
928 static_cast<X86Operand*>(Operands[1])->isReg() &&
929 static_cast<X86Operand*>(Operands[1])->getReg() == X86::ST0) {
931 Operands.erase(Operands.begin() + 1);
934 // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
936 if (Name.startswith("imul") && Operands.size() == 3 &&
937 static_cast<X86Operand*>(Operands[1])->isImm() &&
938 static_cast<X86Operand*>(Operands.back())->isReg()) {
939 X86Operand *Op = static_cast<X86Operand*>(Operands.back());
940 Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
944 // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
945 // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
946 // errors, since its encoding is the most compact.
947 if (Name == "sldt" && Operands.size() == 2 &&
948 static_cast<X86Operand*>(Operands[1])->isMem()) {
950 Operands[0] = X86Operand::CreateToken("sldtw", NameLoc);
953 // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as
954 // synonyms. Our tables only have the "<reg>, <mem>" form, so if we see the
955 // other operand order, swap them.
956 if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq"||
958 if (Operands.size() == 3 &&
959 static_cast<X86Operand*>(Operands[1])->isMem() &&
960 static_cast<X86Operand*>(Operands[2])->isReg()) {
961 std::swap(Operands[1], Operands[2]);
964 // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as
965 // synonyms. Our tables only have the "<mem>, <reg>" form, so if we see the
966 // other operand order, swap them.
967 if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq"||
969 if (Operands.size() == 3 &&
970 static_cast<X86Operand*>(Operands[1])->isReg() &&
971 static_cast<X86Operand*>(Operands[2])->isMem()) {
972 std::swap(Operands[1], Operands[2]);
975 // The assembler accepts these instructions with no operand as a synonym for
976 // an instruction acting on st(1). e.g. "fxch" -> "fxch %st(1)".
977 if ((Name == "fxch" || Name == "fucom" || Name == "fucomp" ||
978 Name == "faddp" || Name == "fsubp" || Name == "fsubrp" ||
979 Name == "fmulp" || Name == "fdivp" || Name == "fdivrp") &&
980 Operands.size() == 1) {
981 Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"),
985 // The assembler accepts these instructions with two few operands as a synonym
986 // for taking %st(1),%st(0) or X, %st(0).
987 if ((Name == "fcomi" || Name == "fucomi") && Operands.size() < 3) {
988 if (Operands.size() == 1)
989 Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"),
991 Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(0)"),
995 // The assembler accepts various amounts of brokenness for fnstsw.
996 if (Name == "fnstsw") {
997 if (Operands.size() == 2 &&
998 static_cast<X86Operand*>(Operands[1])->isReg()) {
999 // "fnstsw al" and "fnstsw eax" -> "fnstw"
1000 unsigned Reg = static_cast<X86Operand*>(Operands[1])->Reg.RegNo;
1001 if (Reg == MatchRegisterName("eax") ||
1002 Reg == MatchRegisterName("al")) {
1004 Operands.pop_back();
1008 // "fnstw" -> "fnstw %ax"
1009 if (Operands.size() == 1)
1010 Operands.push_back(X86Operand::CreateReg(MatchRegisterName("ax"),
1014 // jmp $42,$5 -> ljmp, similarly for call.
1015 if ((Name.startswith("call") || Name.startswith("jmp")) &&
1016 Operands.size() == 3 &&
1017 static_cast<X86Operand*>(Operands[1])->isImm() &&
1018 static_cast<X86Operand*>(Operands[2])->isImm()) {
1019 const char *NewOpName = StringSwitch<const char *>(Name)
1020 .Case("jmp", "ljmp")
1021 .Case("jmpw", "ljmpw")
1022 .Case("jmpl", "ljmpl")
1023 .Case("jmpq", "ljmpq")
1024 .Case("call", "lcall")
1025 .Case("callw", "lcallw")
1026 .Case("calll", "lcalll")
1027 .Case("callq", "lcallq")
1031 Operands[0] = X86Operand::CreateToken(NewOpName, NameLoc);
1036 // lcall and ljmp -> lcalll and ljmpl
1037 if ((Name == "lcall" || Name == "ljmp") && Operands.size() == 3) {
1039 Operands[0] = X86Operand::CreateToken(Name == "lcall" ? "lcalll" : "ljmpl",
1043 // call foo is not ambiguous with callw.
1044 if (Name == "call" && Operands.size() == 2) {
1045 const char *NewName = Is64Bit ? "callq" : "calll";
1047 Operands[0] = X86Operand::CreateToken(NewName, NameLoc);
1051 // movsd -> movsl (when no operands are specified).
1052 if (Name == "movsd" && Operands.size() == 1) {
1054 Operands[0] = X86Operand::CreateToken("movsl", NameLoc);
1057 // fstp <mem> -> fstps <mem>. Without this, we'll default to fstpl due to
1058 // suffix searching.
1059 if (Name == "fstp" && Operands.size() == 2 &&
1060 static_cast<X86Operand*>(Operands[1])->isMem()) {
1062 Operands[0] = X86Operand::CreateToken("fstps", NameLoc);
1066 // "clr <reg>" -> "xor <reg>, <reg>".
1067 if ((Name == "clrb" || Name == "clrw" || Name == "clrl" || Name == "clrq" ||
1068 Name == "clr") && Operands.size() == 2 &&
1069 static_cast<X86Operand*>(Operands[1])->isReg()) {
1070 unsigned RegNo = static_cast<X86Operand*>(Operands[1])->getReg();
1071 Operands.push_back(X86Operand::CreateReg(RegNo, NameLoc, NameLoc));
1073 Operands[0] = X86Operand::CreateToken("xor", NameLoc);
1079 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
1080 StringRef IDVal = DirectiveID.getIdentifier();
1081 if (IDVal == ".word")
1082 return ParseDirectiveWord(2, DirectiveID.getLoc());
1086 /// ParseDirectiveWord
1087 /// ::= .word [ expression (, expression)* ]
1088 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
1089 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1091 const MCExpr *Value;
1092 if (getParser().ParseExpression(Value))
1095 getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
1097 if (getLexer().is(AsmToken::EndOfStatement))
1100 // FIXME: Improve diagnostic.
1101 if (getLexer().isNot(AsmToken::Comma))
1102 return Error(L, "unexpected token in directive");
1112 bool X86ATTAsmParser::
1113 MatchAndEmitInstruction(SMLoc IDLoc,
1114 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1116 assert(!Operands.empty() && "Unexpect empty operand list!");
1117 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
1118 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
1120 // First, handle aliases that expand to multiple instructions.
1121 // FIXME: This should be replaced with a real .td file alias mechanism.
1122 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
1123 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
1124 Op->getToken() == "fstenv") {
1126 Inst.setOpcode(X86::WAIT);
1127 Out.EmitInstruction(Inst);
1130 StringSwitch<const char*>(Op->getToken())
1131 .Case("finit", "fninit")
1132 .Case("fsave", "fnsave")
1133 .Case("fstcw", "fnstcw")
1134 .Case("fstenv", "fnstenv")
1135 .Case("fstsw", "fnstsw")
1137 assert(Repl && "Unknown wait-prefixed instruction");
1139 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
1143 bool WasOriginallyInvalidOperand = false;
1144 unsigned OrigErrorInfo;
1147 // First, try a direct match.
1148 switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
1150 Out.EmitInstruction(Inst);
1152 case Match_MissingFeature:
1153 Error(IDLoc, "instruction requires a CPU feature not currently enabled");
1155 case Match_InvalidOperand:
1156 WasOriginallyInvalidOperand = true;
1158 case Match_MnemonicFail:
1162 // FIXME: Ideally, we would only attempt suffix matches for things which are
1163 // valid prefixes, and we could just infer the right unambiguous
1164 // type. However, that requires substantially more matcher support than the
1167 // Change the operand to point to a temporary token.
1168 StringRef Base = Op->getToken();
1169 SmallString<16> Tmp;
1172 Op->setTokenValue(Tmp.str());
1174 // Check for the various suffix matches.
1175 Tmp[Base.size()] = 'b';
1176 unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo;
1177 MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo);
1178 Tmp[Base.size()] = 'w';
1179 MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo);
1180 Tmp[Base.size()] = 'l';
1181 MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo);
1182 Tmp[Base.size()] = 'q';
1183 MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo);
1185 // Restore the old token.
1186 Op->setTokenValue(Base);
1188 // If exactly one matched, then we treat that as a successful match (and the
1189 // instruction will already have been filled in correctly, since the failing
1190 // matches won't have modified it).
1191 unsigned NumSuccessfulMatches =
1192 (MatchB == Match_Success) + (MatchW == Match_Success) +
1193 (MatchL == Match_Success) + (MatchQ == Match_Success);
1194 if (NumSuccessfulMatches == 1) {
1195 Out.EmitInstruction(Inst);
1199 // Otherwise, the match failed, try to produce a decent error message.
1201 // If we had multiple suffix matches, then identify this as an ambiguous
1203 if (NumSuccessfulMatches > 1) {
1205 unsigned NumMatches = 0;
1206 if (MatchB == Match_Success)
1207 MatchChars[NumMatches++] = 'b';
1208 if (MatchW == Match_Success)
1209 MatchChars[NumMatches++] = 'w';
1210 if (MatchL == Match_Success)
1211 MatchChars[NumMatches++] = 'l';
1212 if (MatchQ == Match_Success)
1213 MatchChars[NumMatches++] = 'q';
1215 SmallString<126> Msg;
1216 raw_svector_ostream OS(Msg);
1217 OS << "ambiguous instructions require an explicit suffix (could be ";
1218 for (unsigned i = 0; i != NumMatches; ++i) {
1221 if (i + 1 == NumMatches)
1223 OS << "'" << Base << MatchChars[i] << "'";
1226 Error(IDLoc, OS.str());
1230 // Okay, we know that none of the variants matched successfully.
1232 // If all of the instructions reported an invalid mnemonic, then the original
1233 // mnemonic was invalid.
1234 if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) &&
1235 (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) {
1236 if (!WasOriginallyInvalidOperand) {
1237 Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
1241 // Recover location info for the operand if we know which was the problem.
1242 SMLoc ErrorLoc = IDLoc;
1243 if (OrigErrorInfo != ~0U) {
1244 if (OrigErrorInfo >= Operands.size())
1245 return Error(IDLoc, "too few operands for instruction");
1247 ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
1248 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
1251 return Error(ErrorLoc, "invalid operand for instruction");
1254 // If one instruction matched with a missing feature, report this as a
1256 if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) +
1257 (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){
1258 Error(IDLoc, "instruction requires a CPU feature not currently enabled");
1262 // If one instruction matched with an invalid operand, report this as an
1264 if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) +
1265 (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){
1266 Error(IDLoc, "invalid operand for instruction");
1270 // If all of these were an outright failure, report it in a useless way.
1271 // FIXME: We should give nicer diagnostics about the exact failure.
1272 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
1277 extern "C" void LLVMInitializeX86AsmLexer();
1279 // Force static initialization.
1280 extern "C" void LLVMInitializeX86AsmParser() {
1281 RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
1282 RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
1283 LLVMInitializeX86AsmLexer();
1286 #define GET_REGISTER_MATCHER
1287 #define GET_MATCHER_IMPLEMENTATION
1288 #include "X86GenAsmMatcher.inc"