1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/StringSwitch.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCParser/MCAsmLexer.h"
20 #include "llvm/MC/MCParser/MCAsmParser.h"
21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCStreamer.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCTargetAsmParser.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/TargetRegistry.h"
29 #include "llvm/Support/raw_ostream.h"
36 class X86AsmParser : public MCTargetAsmParser {
39 ParseInstructionInfo *InstInfo;
41 MCAsmParser &getParser() const { return Parser; }
43 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
45 bool Error(SMLoc L, const Twine &Msg,
46 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
47 bool MatchingInlineAsm = false) {
48 if (MatchingInlineAsm) return true;
49 return Parser.Error(L, Msg, Ranges);
52 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
57 X86Operand *ParseOperand();
58 X86Operand *ParseATTOperand();
59 X86Operand *ParseIntelOperand();
60 X86Operand *ParseIntelOffsetOfOperator();
61 X86Operand *ParseIntelOperator(unsigned OpKind);
62 X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
64 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc SizeDirLoc,
65 uint64_t ImmDisp, unsigned Size);
66 X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
67 StringRef &Identifier);
68 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
70 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
71 unsigned BaseReg, unsigned IndexReg,
72 unsigned Scale, SMLoc Start, SMLoc End,
73 SMLoc SizeDirLoc, unsigned Size,
76 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
77 SmallString<64> &Err);
79 bool ParseDirectiveWord(unsigned Size, SMLoc L);
80 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
82 bool processInstruction(MCInst &Inst,
83 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
85 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
86 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
87 MCStreamer &Out, unsigned &ErrorInfo,
88 bool MatchingInlineAsm);
90 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
91 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
92 bool isSrcOp(X86Operand &Op);
94 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
95 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
96 bool isDstOp(X86Operand &Op);
98 bool is64BitMode() const {
99 // FIXME: Can tablegen auto-generate this?
100 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
103 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
104 setAvailableFeatures(FB);
107 /// @name Auto-generated Matcher Functions
110 #define GET_ASSEMBLER_HEADER
111 #include "X86GenAsmMatcher.inc"
116 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
117 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
119 // Initialize the set of available features.
120 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
122 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
124 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
126 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
128 virtual bool ParseDirective(AsmToken DirectiveID);
130 bool isParsingIntelSyntax() {
131 return getParser().getAssemblerDialect();
134 } // end anonymous namespace
136 /// @name Auto-generated Match Functions
139 static unsigned MatchRegisterName(StringRef Name);
143 static bool isImmSExti16i8Value(uint64_t Value) {
144 return (( Value <= 0x000000000000007FULL)||
145 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
146 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
149 static bool isImmSExti32i8Value(uint64_t Value) {
150 return (( Value <= 0x000000000000007FULL)||
151 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
152 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
155 static bool isImmZExtu32u8Value(uint64_t Value) {
156 return (Value <= 0x00000000000000FFULL);
159 static bool isImmSExti64i8Value(uint64_t Value) {
160 return (( Value <= 0x000000000000007FULL)||
161 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
164 static bool isImmSExti64i32Value(uint64_t Value) {
165 return (( Value <= 0x000000007FFFFFFFULL)||
166 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
170 /// X86Operand - Instances of this class represent a parsed X86 machine
172 struct X86Operand : public MCParsedAsmOperand {
180 SMLoc StartLoc, EndLoc;
214 X86Operand(KindTy K, SMLoc Start, SMLoc End)
215 : Kind(K), StartLoc(Start), EndLoc(End) {}
217 StringRef getSymName() { return SymName; }
219 /// getStartLoc - Get the location of the first token of this operand.
220 SMLoc getStartLoc() const { return StartLoc; }
221 /// getEndLoc - Get the location of the last token of this operand.
222 SMLoc getEndLoc() const { return EndLoc; }
223 /// getLocRange - Get the range between the first and last token of this
225 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
226 /// getOffsetOfLoc - Get the location of the offset operator.
227 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
229 virtual void print(raw_ostream &OS) const {}
231 StringRef getToken() const {
232 assert(Kind == Token && "Invalid access!");
233 return StringRef(Tok.Data, Tok.Length);
235 void setTokenValue(StringRef Value) {
236 assert(Kind == Token && "Invalid access!");
237 Tok.Data = Value.data();
238 Tok.Length = Value.size();
241 unsigned getReg() const {
242 assert(Kind == Register && "Invalid access!");
246 const MCExpr *getImm() const {
247 assert(Kind == Immediate && "Invalid access!");
251 const MCExpr *getMemDisp() const {
252 assert(Kind == Memory && "Invalid access!");
255 unsigned getMemSegReg() const {
256 assert(Kind == Memory && "Invalid access!");
259 unsigned getMemBaseReg() const {
260 assert(Kind == Memory && "Invalid access!");
263 unsigned getMemIndexReg() const {
264 assert(Kind == Memory && "Invalid access!");
267 unsigned getMemScale() const {
268 assert(Kind == Memory && "Invalid access!");
272 bool isToken() const {return Kind == Token; }
274 bool isImm() const { return Kind == Immediate; }
276 bool isImmSExti16i8() const {
280 // If this isn't a constant expr, just assume it fits and let relaxation
282 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
286 // Otherwise, check the value is in a range that makes sense for this
288 return isImmSExti16i8Value(CE->getValue());
290 bool isImmSExti32i8() const {
294 // If this isn't a constant expr, just assume it fits and let relaxation
296 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
300 // Otherwise, check the value is in a range that makes sense for this
302 return isImmSExti32i8Value(CE->getValue());
304 bool isImmZExtu32u8() const {
308 // If this isn't a constant expr, just assume it fits and let relaxation
310 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
314 // Otherwise, check the value is in a range that makes sense for this
316 return isImmZExtu32u8Value(CE->getValue());
318 bool isImmSExti64i8() const {
322 // If this isn't a constant expr, just assume it fits and let relaxation
324 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
328 // Otherwise, check the value is in a range that makes sense for this
330 return isImmSExti64i8Value(CE->getValue());
332 bool isImmSExti64i32() const {
336 // If this isn't a constant expr, just assume it fits and let relaxation
338 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
342 // Otherwise, check the value is in a range that makes sense for this
344 return isImmSExti64i32Value(CE->getValue());
347 bool isOffsetOf() const {
348 return OffsetOfLoc.getPointer();
351 bool needAddressOf() const {
355 bool isMem() const { return Kind == Memory; }
356 bool isMem8() const {
357 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
359 bool isMem16() const {
360 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
362 bool isMem32() const {
363 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
365 bool isMem64() const {
366 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
368 bool isMem80() const {
369 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
371 bool isMem128() const {
372 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
374 bool isMem256() const {
375 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
378 bool isMemVX32() const {
379 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
380 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
382 bool isMemVY32() const {
383 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
384 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
386 bool isMemVX64() const {
387 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
388 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
390 bool isMemVY64() const {
391 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
392 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
395 bool isAbsMem() const {
396 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
397 !getMemIndexReg() && getMemScale() == 1;
400 bool isReg() const { return Kind == Register; }
402 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
403 // Add as immediates when possible.
404 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
405 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
407 Inst.addOperand(MCOperand::CreateExpr(Expr));
410 void addRegOperands(MCInst &Inst, unsigned N) const {
411 assert(N == 1 && "Invalid number of operands!");
412 Inst.addOperand(MCOperand::CreateReg(getReg()));
415 void addImmOperands(MCInst &Inst, unsigned N) const {
416 assert(N == 1 && "Invalid number of operands!");
417 addExpr(Inst, getImm());
420 void addMem8Operands(MCInst &Inst, unsigned N) const {
421 addMemOperands(Inst, N);
423 void addMem16Operands(MCInst &Inst, unsigned N) const {
424 addMemOperands(Inst, N);
426 void addMem32Operands(MCInst &Inst, unsigned N) const {
427 addMemOperands(Inst, N);
429 void addMem64Operands(MCInst &Inst, unsigned N) const {
430 addMemOperands(Inst, N);
432 void addMem80Operands(MCInst &Inst, unsigned N) const {
433 addMemOperands(Inst, N);
435 void addMem128Operands(MCInst &Inst, unsigned N) const {
436 addMemOperands(Inst, N);
438 void addMem256Operands(MCInst &Inst, unsigned N) const {
439 addMemOperands(Inst, N);
441 void addMemVX32Operands(MCInst &Inst, unsigned N) const {
442 addMemOperands(Inst, N);
444 void addMemVY32Operands(MCInst &Inst, unsigned N) const {
445 addMemOperands(Inst, N);
447 void addMemVX64Operands(MCInst &Inst, unsigned N) const {
448 addMemOperands(Inst, N);
450 void addMemVY64Operands(MCInst &Inst, unsigned N) const {
451 addMemOperands(Inst, N);
454 void addMemOperands(MCInst &Inst, unsigned N) const {
455 assert((N == 5) && "Invalid number of operands!");
456 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
457 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
458 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
459 addExpr(Inst, getMemDisp());
460 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
463 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
464 assert((N == 1) && "Invalid number of operands!");
465 // Add as immediates when possible.
466 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
467 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
469 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
472 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
473 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
474 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
475 Res->Tok.Data = Str.data();
476 Res->Tok.Length = Str.size();
480 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
481 bool AddressOf = false,
482 SMLoc OffsetOfLoc = SMLoc(),
483 StringRef SymName = StringRef()) {
484 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
485 Res->Reg.RegNo = RegNo;
486 Res->AddressOf = AddressOf;
487 Res->OffsetOfLoc = OffsetOfLoc;
488 Res->SymName = SymName;
492 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
493 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
498 /// Create an absolute memory operand.
499 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
501 StringRef SymName = StringRef()) {
502 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
504 Res->Mem.Disp = Disp;
505 Res->Mem.BaseReg = 0;
506 Res->Mem.IndexReg = 0;
508 Res->Mem.Size = Size;
509 Res->SymName = SymName;
510 Res->AddressOf = false;
514 /// Create a generalized memory operand.
515 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
516 unsigned BaseReg, unsigned IndexReg,
517 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
519 StringRef SymName = StringRef()) {
520 // We should never just have a displacement, that should be parsed as an
521 // absolute memory operand.
522 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
524 // The scale should always be one of {1,2,4,8}.
525 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
527 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
528 Res->Mem.SegReg = SegReg;
529 Res->Mem.Disp = Disp;
530 Res->Mem.BaseReg = BaseReg;
531 Res->Mem.IndexReg = IndexReg;
532 Res->Mem.Scale = Scale;
533 Res->Mem.Size = Size;
534 Res->SymName = SymName;
535 Res->AddressOf = false;
540 } // end anonymous namespace.
542 bool X86AsmParser::isSrcOp(X86Operand &Op) {
543 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
545 return (Op.isMem() &&
546 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
547 isa<MCConstantExpr>(Op.Mem.Disp) &&
548 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
549 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
552 bool X86AsmParser::isDstOp(X86Operand &Op) {
553 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
556 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
557 isa<MCConstantExpr>(Op.Mem.Disp) &&
558 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
559 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
562 bool X86AsmParser::ParseRegister(unsigned &RegNo,
563 SMLoc &StartLoc, SMLoc &EndLoc) {
565 const AsmToken &PercentTok = Parser.getTok();
566 StartLoc = PercentTok.getLoc();
568 // If we encounter a %, ignore it. This code handles registers with and
569 // without the prefix, unprefixed registers can occur in cfi directives.
570 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
571 Parser.Lex(); // Eat percent token.
573 const AsmToken &Tok = Parser.getTok();
574 EndLoc = Tok.getEndLoc();
576 if (Tok.isNot(AsmToken::Identifier)) {
577 if (isParsingIntelSyntax()) return true;
578 return Error(StartLoc, "invalid register name",
579 SMRange(StartLoc, EndLoc));
582 RegNo = MatchRegisterName(Tok.getString());
584 // If the match failed, try the register name as lowercase.
586 RegNo = MatchRegisterName(Tok.getString().lower());
588 if (!is64BitMode()) {
589 // FIXME: This should be done using Requires<In32BitMode> and
590 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
592 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
594 if (RegNo == X86::RIZ ||
595 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
596 X86II::isX86_64NonExtLowByteReg(RegNo) ||
597 X86II::isX86_64ExtendedReg(RegNo))
598 return Error(StartLoc, "register %"
599 + Tok.getString() + " is only available in 64-bit mode",
600 SMRange(StartLoc, EndLoc));
603 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
604 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
606 Parser.Lex(); // Eat 'st'
608 // Check to see if we have '(4)' after %st.
609 if (getLexer().isNot(AsmToken::LParen))
614 const AsmToken &IntTok = Parser.getTok();
615 if (IntTok.isNot(AsmToken::Integer))
616 return Error(IntTok.getLoc(), "expected stack index");
617 switch (IntTok.getIntVal()) {
618 case 0: RegNo = X86::ST0; break;
619 case 1: RegNo = X86::ST1; break;
620 case 2: RegNo = X86::ST2; break;
621 case 3: RegNo = X86::ST3; break;
622 case 4: RegNo = X86::ST4; break;
623 case 5: RegNo = X86::ST5; break;
624 case 6: RegNo = X86::ST6; break;
625 case 7: RegNo = X86::ST7; break;
626 default: return Error(IntTok.getLoc(), "invalid stack index");
629 if (getParser().Lex().isNot(AsmToken::RParen))
630 return Error(Parser.getTok().getLoc(), "expected ')'");
632 EndLoc = Parser.getTok().getEndLoc();
633 Parser.Lex(); // Eat ')'
637 EndLoc = Parser.getTok().getEndLoc();
639 // If this is "db[0-7]", match it as an alias
641 if (RegNo == 0 && Tok.getString().size() == 3 &&
642 Tok.getString().startswith("db")) {
643 switch (Tok.getString()[2]) {
644 case '0': RegNo = X86::DR0; break;
645 case '1': RegNo = X86::DR1; break;
646 case '2': RegNo = X86::DR2; break;
647 case '3': RegNo = X86::DR3; break;
648 case '4': RegNo = X86::DR4; break;
649 case '5': RegNo = X86::DR5; break;
650 case '6': RegNo = X86::DR6; break;
651 case '7': RegNo = X86::DR7; break;
655 EndLoc = Parser.getTok().getEndLoc();
656 Parser.Lex(); // Eat it.
662 if (isParsingIntelSyntax()) return true;
663 return Error(StartLoc, "invalid register name",
664 SMRange(StartLoc, EndLoc));
667 Parser.Lex(); // Eat identifier token.
671 X86Operand *X86AsmParser::ParseOperand() {
672 if (isParsingIntelSyntax())
673 return ParseIntelOperand();
674 return ParseATTOperand();
677 /// getIntelMemOperandSize - Return intel memory operand size.
678 static unsigned getIntelMemOperandSize(StringRef OpStr) {
679 unsigned Size = StringSwitch<unsigned>(OpStr)
680 .Cases("BYTE", "byte", 8)
681 .Cases("WORD", "word", 16)
682 .Cases("DWORD", "dword", 32)
683 .Cases("QWORD", "qword", 64)
684 .Cases("XWORD", "xword", 80)
685 .Cases("XMMWORD", "xmmword", 128)
686 .Cases("YMMWORD", "ymmword", 256)
691 enum InfixCalculatorTok {
701 static const char OpPrecedence[] = {
712 class InfixCalculator {
713 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
714 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
715 SmallVector<ICToken, 4> PostfixStack;
718 int64_t popOperand() {
719 assert (!PostfixStack.empty() && "Poped an empty stack!");
720 ICToken Op = PostfixStack.pop_back_val();
721 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
722 && "Expected and immediate or register!");
725 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
726 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
727 "Unexpected operand!");
728 PostfixStack.push_back(std::make_pair(Op, Val));
731 void popOperator() { InfixOperatorStack.pop_back_val(); }
732 void pushOperator(InfixCalculatorTok Op) {
733 // Push the new operator if the stack is empty.
734 if (InfixOperatorStack.empty()) {
735 InfixOperatorStack.push_back(Op);
739 // Push the new operator if it has a higher precedence than the operator on
740 // the top of the stack or the operator on the top of the stack is a left
742 unsigned Idx = InfixOperatorStack.size() - 1;
743 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
744 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
745 InfixOperatorStack.push_back(Op);
749 // The operator on the top of the stack has higher precedence than the
751 unsigned ParenCount = 0;
753 // Nothing to process.
754 if (InfixOperatorStack.empty())
757 Idx = InfixOperatorStack.size() - 1;
758 StackOp = InfixOperatorStack[Idx];
759 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
762 // If we have an even parentheses count and we see a left parentheses,
763 // then stop processing.
764 if (!ParenCount && StackOp == IC_LPAREN)
767 if (StackOp == IC_RPAREN) {
769 InfixOperatorStack.pop_back_val();
770 } else if (StackOp == IC_LPAREN) {
772 InfixOperatorStack.pop_back_val();
774 InfixOperatorStack.pop_back_val();
775 PostfixStack.push_back(std::make_pair(StackOp, 0));
778 // Push the new operator.
779 InfixOperatorStack.push_back(Op);
782 // Push any remaining operators onto the postfix stack.
783 while (!InfixOperatorStack.empty()) {
784 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
785 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
786 PostfixStack.push_back(std::make_pair(StackOp, 0));
789 if (PostfixStack.empty())
792 SmallVector<ICToken, 16> OperandStack;
793 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
794 ICToken Op = PostfixStack[i];
795 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
796 OperandStack.push_back(Op);
798 assert (OperandStack.size() > 1 && "Too few operands.");
800 ICToken Op2 = OperandStack.pop_back_val();
801 ICToken Op1 = OperandStack.pop_back_val();
804 report_fatal_error("Unexpected operator!");
807 Val = Op1.second + Op2.second;
808 OperandStack.push_back(std::make_pair(IC_IMM, Val));
811 Val = Op1.second - Op2.second;
812 OperandStack.push_back(std::make_pair(IC_IMM, Val));
815 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
816 "Multiply operation with an immediate and a register!");
817 Val = Op1.second * Op2.second;
818 OperandStack.push_back(std::make_pair(IC_IMM, Val));
821 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
822 "Divide operation with an immediate and a register!");
823 assert (Op2.second != 0 && "Division by zero!");
824 Val = Op1.second / Op2.second;
825 OperandStack.push_back(std::make_pair(IC_IMM, Val));
830 assert (OperandStack.size() == 1 && "Expected a single result.");
831 return OperandStack.pop_back_val().second;
835 enum IntelBracExprState {
852 class IntelBracExprStateMachine {
853 IntelBracExprState State;
854 unsigned BaseReg, IndexReg, TmpReg, Scale;
860 IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
861 State(IBES_PLUS), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Disp(disp),
864 unsigned getBaseReg() { return BaseReg; }
865 unsigned getIndexReg() { return IndexReg; }
866 unsigned getScale() { return Scale; }
867 const MCExpr *getSym() { return Sym; }
868 StringRef getSymName() { return SymName; }
869 int64_t getImmDisp() { return Disp + IC.execute(); }
870 bool isValidEndState() { return State == IBES_RBRAC; }
880 IC.pushOperator(IC_PLUS);
884 // If we already have a BaseReg, then assume this is the IndexReg with a
889 assert (!IndexReg && "BaseReg/IndexReg already set!");
893 IC.pushOperator(IC_PLUS);
904 IC.pushOperand(IC_IMM);
908 IC.pushOperator(IC_MINUS);
912 // If we already have a BaseReg, then assume this is the IndexReg with a
917 assert (!IndexReg && "BaseReg/IndexReg already set!");
921 IC.pushOperator(IC_MINUS);
925 void onRegister(unsigned Reg) {
932 State = IBES_REGISTER;
934 IC.pushOperand(IC_REGISTER);
936 case IBES_INTEGER_STAR:
937 assert (!IndexReg && "IndexReg already set!");
938 State = IBES_INTEGER;
940 Scale = IC.popOperand();
941 IC.pushOperand(IC_IMM);
946 void onDispExpr(const MCExpr *SymRef, StringRef SymRefName) {
953 State = IBES_INTEGER;
955 SymName = SymRefName;
956 IC.pushOperand(IC_IMM);
960 void onInteger(int64_t TmpInt) {
970 case IBES_INTEGER_STAR:
971 State = IBES_INTEGER;
972 IC.pushOperand(IC_IMM, TmpInt);
974 case IBES_REGISTER_STAR:
975 assert (!IndexReg && "IndexReg already set!");
976 State = IBES_INTEGER;
989 State = IBES_INTEGER_STAR;
990 IC.pushOperator(IC_MULTIPLY);
993 State = IBES_REGISTER_STAR;
994 IC.pushOperator(IC_MULTIPLY);
997 State = IBES_MULTIPLY;
998 IC.pushOperator(IC_MULTIPLY);
1008 State = IBES_DIVIDE;
1009 IC.pushOperator(IC_DIVIDE);
1020 IC.pushOperator(IC_PLUS);
1035 // If we already have a BaseReg, then assume this is the IndexReg with a
1040 assert (!IndexReg && "BaseReg/IndexReg already set!");
1056 case IBES_INTEGER_STAR:
1058 State = IBES_LPAREN;
1059 IC.pushOperator(IC_LPAREN);
1075 State = IBES_RPAREN;
1076 IC.pushOperator(IC_RPAREN);
1083 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
1084 unsigned BaseReg, unsigned IndexReg,
1085 unsigned Scale, SMLoc Start, SMLoc End,
1086 SMLoc SizeDirLoc, unsigned Size,
1087 StringRef SymName) {
1088 bool NeedSizeDir = false;
1089 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
1090 const MCSymbol &Sym = SymRef->getSymbol();
1091 // FIXME: The SemaLookup will fail if the name is anything other then an
1093 // FIXME: Pass a valid SMLoc.
1094 bool IsVarDecl = false;
1095 unsigned tLength, tSize, tType;
1096 SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, tSize,
1099 Size = tType * 8; // Size is in terms of bits in this context.
1100 NeedSizeDir = Size > 0;
1102 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1103 // reference. We need an 'r' constraint here, so we need to create register
1104 // operand to ensure proper matching. Just pick a GPR based on the size of
1107 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1108 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
1114 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
1117 // When parsing inline assembly we set the base register to a non-zero value
1118 // if we don't know the actual value at this time. This is necessary to
1119 // get the matching correct in some cases.
1120 BaseReg = BaseReg ? BaseReg : 1;
1121 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1122 End, Size, SymName);
1125 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
1129 const AsmToken &Tok = Parser.getTok();
1130 SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
1132 if (getLexer().isNot(AsmToken::LBrac))
1133 return ErrorOperand(Start, "Expected '[' token!");
1134 Parser.Lex(); // Eat '['
1136 unsigned TmpReg = 0;
1137 SMLoc StartInBrac = Tok.getLoc();
1139 // Try to handle '[' 'Symbol' ']'
1140 if (getLexer().is(AsmToken::Identifier)) {
1141 SMLoc Loc = Tok.getLoc();
1142 if (ParseRegister(TmpReg, Loc, End)) {
1144 StringRef Identifier = Tok.getString();
1145 if (getParser().parseExpression(Disp, End))
1148 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1151 if (getLexer().isNot(AsmToken::RBrac))
1152 return ErrorOperand(Tok.getLoc(), "Expected ']' token!");
1154 if (isParsingInlineAsm()) {
1155 // Remove the '[' and ']' from the IR string.
1156 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, Start, 1));
1157 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, Tok.getLoc(), 1));
1159 Parser.Lex(); // Eat ']'
1160 if (!isParsingInlineAsm())
1161 return X86Operand::CreateMem(Disp, Start, End, Size);
1162 return CreateMemForInlineAsm(/*SegReg=*/0, Disp, /*BaseReg=*/0,
1163 /*IndexReg=*/0, /*Scale*/1, Start, End,
1164 SizeDirLoc, Size, Identifier);
1168 // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an
1169 // immediate displacement before the bracketed expression.
1171 IntelBracExprStateMachine SM(Parser, ImmDisp);
1173 // If we parsed a register, then the end loc has already been set and
1174 // the identifier has already been lexed. We also need to update the
1177 SM.onRegister(TmpReg);
1180 bool UpdateLocLex = true;
1182 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1183 // identifier. Don't try an parse it as a register.
1184 if (Tok.getString().startswith("."))
1187 switch (getLexer().getKind()) {
1189 if (SM.isValidEndState()) {
1193 return ErrorOperand(Tok.getLoc(), "Unexpected token!");
1195 case AsmToken::Identifier: {
1196 // This could be a register or a symbolic displacement.
1198 const MCExpr *Disp = 0;
1199 AsmToken IdentTok = Parser.getTok();
1200 SMLoc IdentLoc = IdentTok.getLoc();
1201 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1202 SM.onRegister(TmpReg);
1203 UpdateLocLex = false;
1205 } else if (!getParser().parsePrimaryExpr(Disp, End)) {
1206 SM.onDispExpr(Disp, IdentTok.getString());
1207 UpdateLocLex = false;
1210 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
1212 case AsmToken::Integer:
1213 if (isParsingInlineAsm())
1214 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1216 SM.onInteger(Tok.getIntVal());
1218 case AsmToken::Plus: SM.onPlus(); break;
1219 case AsmToken::Minus: SM.onMinus(); break;
1220 case AsmToken::Star: SM.onStar(); break;
1221 case AsmToken::Slash: SM.onDivide(); break;
1222 case AsmToken::LBrac: SM.onLBrac(); break;
1223 case AsmToken::RBrac: SM.onRBrac(); break;
1224 case AsmToken::LParen: SM.onLParen(); break;
1225 case AsmToken::RParen: SM.onRParen(); break;
1227 if (!Done && UpdateLocLex) {
1229 Parser.Lex(); // Consume the token.
1234 if (const MCExpr *Sym = SM.getSym()) {
1237 if (isParsingInlineAsm()) {
1238 // Remove the '[' and ']' from the IR string.
1239 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, Start, 1));
1240 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1242 // If ImmDisp is non-zero, then we parsed a displacement before the
1243 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp ])
1244 uint64_t FinalImmDisp = SM.getImmDisp();
1245 if (ImmDisp && ImmDisp != FinalImmDisp) {
1246 // If ImmDisp doesn't match the displacement computed by the state machine
1247 // then we have an additional displacement in the bracketed expression.
1249 } else if (FinalImmDisp) {
1250 // We have a symbolic and an immediate displacement, but no displacement
1251 // before the bracketed expression.
1253 // Put the immediate displacement before the bracketed expression.
1254 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, 0,
1257 // Remove all the ImmPrefix rewrites within the brackets.
1258 for (SmallVectorImpl<AsmRewrite>::iterator
1259 I = InstInfo->AsmRewrites->begin(),
1260 E = InstInfo->AsmRewrites->end(); I != E; ++I) {
1261 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1263 if ((*I).Kind == AOK_ImmPrefix)
1264 (*I).Kind = AOK_Delete;
1266 StringRef SymName = SM.getSymName();
1267 const char *SymLocPtr = SymName.data();
1268 // Skip everything before the symbol.
1269 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1270 assert(Len > 0 && "Expected a non-negative length.");
1271 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1273 // Skip everything after the symbol.
1274 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1275 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1276 assert(Len > 0 && "Expected a non-negative length.");
1277 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1281 // An immediate displacement only.
1282 Disp = MCConstantExpr::Create(SM.getImmDisp(), getContext());
1285 // Parse the dot operator (e.g., [ebx].foo.bar).
1286 if (Tok.getString().startswith(".")) {
1287 SmallString<64> Err;
1288 const MCExpr *NewDisp;
1289 if (ParseIntelDotOperator(Disp, &NewDisp, Err))
1290 return ErrorOperand(Tok.getLoc(), Err);
1292 End = Tok.getEndLoc();
1293 Parser.Lex(); // Eat the field.
1297 int BaseReg = SM.getBaseReg();
1298 int IndexReg = SM.getIndexReg();
1299 int Scale = SM.getScale();
1301 if (isParsingInlineAsm())
1302 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1303 End, SizeDirLoc, Size, SM.getSymName());
1306 if (!BaseReg && !IndexReg) {
1308 return X86Operand::CreateMem(Disp, Start, End, Size);
1310 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1312 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1316 // Inline assembly may use variable names with namespace alias qualifiers.
1317 X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
1318 StringRef &Identifier) {
1319 // We should only see Foo::Bar if we're parsing inline assembly.
1320 if (!isParsingInlineAsm())
1323 // If we don't see a ':' then there can't be a qualifier.
1324 if (getLexer().isNot(AsmToken::Colon))
1328 const AsmToken &Tok = Parser.getTok();
1329 AsmToken IdentEnd = Tok;
1331 switch (getLexer().getKind()) {
1335 case AsmToken::Colon:
1336 getLexer().Lex(); // Consume ':'.
1337 if (getLexer().isNot(AsmToken::Colon))
1338 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1339 getLexer().Lex(); // Consume second ':'.
1340 if (getLexer().isNot(AsmToken::Identifier))
1341 return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
1343 case AsmToken::Identifier:
1345 getLexer().Lex(); // Consume the identifier.
1350 unsigned Len = IdentEnd.getLoc().getPointer() - Identifier.data();
1351 Identifier = StringRef(Identifier.data(), Len + IdentEnd.getString().size());
1352 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1353 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1354 Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1358 /// ParseIntelMemOperand - Parse intel style memory operand.
1359 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
1362 const AsmToken &Tok = Parser.getTok();
1365 unsigned Size = getIntelMemOperandSize(Tok.getString());
1368 assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
1369 "Unexpected token!");
1373 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1374 if (getLexer().is(AsmToken::Integer)) {
1375 if (isParsingInlineAsm())
1376 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1378 uint64_t ImmDisp = Tok.getIntVal();
1379 Parser.Lex(); // Eat the integer.
1380 if (getLexer().isNot(AsmToken::LBrac))
1381 return ErrorOperand(Start, "Expected '[' token!");
1382 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1385 if (getLexer().is(AsmToken::LBrac))
1386 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1388 if (!ParseRegister(SegReg, Start, End)) {
1389 // Handel SegReg : [ ... ]
1390 if (getLexer().isNot(AsmToken::Colon))
1391 return ErrorOperand(Start, "Expected ':' token!");
1392 Parser.Lex(); // Eat :
1393 if (getLexer().isNot(AsmToken::LBrac))
1394 return ErrorOperand(Start, "Expected '[' token!");
1395 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1398 const MCExpr *Disp = 0;
1399 StringRef Identifier = Tok.getString();
1400 if (getParser().parseExpression(Disp, End))
1403 if (!isParsingInlineAsm())
1404 return X86Operand::CreateMem(Disp, Start, End, Size);
1406 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1409 return CreateMemForInlineAsm(/*SegReg=*/0, Disp, /*BaseReg=*/0,/*IndexReg=*/0,
1410 /*Scale=*/1, Start, End, Start, Size,Identifier);
1413 /// Parse the '.' operator.
1414 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1415 const MCExpr **NewDisp,
1416 SmallString<64> &Err) {
1417 const AsmToken &Tok = Parser.getTok();
1418 uint64_t OrigDispVal, DotDispVal;
1420 // FIXME: Handle non-constant expressions.
1421 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
1422 OrigDispVal = OrigDisp->getValue();
1424 Err = "Non-constant offsets are not supported!";
1429 StringRef DotDispStr = Tok.getString().drop_front(1);
1431 // .Imm gets lexed as a real.
1432 if (Tok.is(AsmToken::Real)) {
1434 DotDispStr.getAsInteger(10, DotDisp);
1435 DotDispVal = DotDisp.getZExtValue();
1436 } else if (Tok.is(AsmToken::Identifier)) {
1437 // We should only see an identifier when parsing the original inline asm.
1438 // The front-end should rewrite this in terms of immediates.
1439 assert (isParsingInlineAsm() && "Unexpected field name!");
1442 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1443 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1445 Err = "Unable to lookup field reference!";
1448 DotDispVal = DotDisp;
1450 Err = "Unexpected token type!";
1454 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1455 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1456 unsigned Len = DotDispStr.size();
1457 unsigned Val = OrigDispVal + DotDispVal;
1458 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1462 *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1466 /// Parse the 'offset' operator. This operator is used to specify the
1467 /// location rather then the content of a variable.
1468 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1469 const AsmToken &Tok = Parser.getTok();
1470 SMLoc OffsetOfLoc = Tok.getLoc();
1471 Parser.Lex(); // Eat offset.
1472 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1475 SMLoc Start = Tok.getLoc(), End;
1476 StringRef Identifier = Tok.getString();
1477 if (getParser().parsePrimaryExpr(Val, End))
1478 return ErrorOperand(Start, "Unable to parse expression!");
1480 const MCExpr *Disp = 0;
1481 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1484 // Don't emit the offset operator.
1485 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1487 // The offset operator will have an 'r' constraint, thus we need to create
1488 // register operand to ensure proper matching. Just pick a GPR based on
1489 // the size of a pointer.
1490 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1491 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1492 OffsetOfLoc, Identifier);
1495 enum IntelOperatorKind {
1501 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1502 /// returns the number of elements in an array. It returns the value 1 for
1503 /// non-array variables. The SIZE operator returns the size of a C or C++
1504 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1505 /// TYPE operator returns the size of a C or C++ type or variable. If the
1506 /// variable is an array, TYPE returns the size of a single element.
1507 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1508 const AsmToken &Tok = Parser.getTok();
1509 SMLoc TypeLoc = Tok.getLoc();
1510 Parser.Lex(); // Eat operator.
1511 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1514 AsmToken StartTok = Tok;
1515 SMLoc Start = Tok.getLoc(), End;
1516 StringRef Identifier = Tok.getString();
1517 if (getParser().parsePrimaryExpr(Val, End))
1518 return ErrorOperand(Start, "Unable to parse expression!");
1520 const MCExpr *Disp = 0;
1521 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1524 unsigned Length = 0, Size = 0, Type = 0;
1525 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
1526 const MCSymbol &Sym = SymRef->getSymbol();
1527 // FIXME: The SemaLookup will fail if the name is anything other then an
1529 // FIXME: Pass a valid SMLoc.
1531 if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
1532 Size, Type, IsVarDecl))
1533 // FIXME: We don't warn on variables with namespace alias qualifiers
1534 // because support still needs to be added in the frontend.
1535 if (Identifier.equals(StartTok.getString()))
1536 return ErrorOperand(Start, "Unable to lookup expr!");
1540 default: llvm_unreachable("Unexpected operand kind!");
1541 case IOK_LENGTH: CVal = Length; break;
1542 case IOK_SIZE: CVal = Size; break;
1543 case IOK_TYPE: CVal = Type; break;
1546 // Rewrite the type operator and the C or C++ type or variable in terms of an
1547 // immediate. E.g. TYPE foo -> $$4
1548 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1549 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1551 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1552 return X86Operand::CreateImm(Imm, Start, End);
1555 X86Operand *X86AsmParser::ParseIntelOperand() {
1556 const AsmToken &Tok = Parser.getTok();
1557 SMLoc Start = Tok.getLoc(), End;
1558 StringRef AsmTokStr = Tok.getString();
1560 // Offset, length, type and size operators.
1561 if (isParsingInlineAsm()) {
1562 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1563 return ParseIntelOffsetOfOperator();
1564 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1565 return ParseIntelOperator(IOK_LENGTH);
1566 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1567 return ParseIntelOperator(IOK_SIZE);
1568 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1569 return ParseIntelOperator(IOK_TYPE);
1573 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
1574 getLexer().is(AsmToken::Minus)) {
1576 bool isInteger = getLexer().is(AsmToken::Integer);
1577 if (!getParser().parseExpression(Val, End)) {
1578 if (isParsingInlineAsm())
1579 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1581 if (getLexer().isNot(AsmToken::LBrac))
1582 return X86Operand::CreateImm(Val, Start, End);
1584 // Only positive immediates are valid.
1586 Error(Tok.getLoc(), "expected a positive immediate "
1587 "displacement before bracketed expr.");
1591 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1592 if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
1593 return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
1599 if (!ParseRegister(RegNo, Start, End)) {
1600 // If this is a segment register followed by a ':', then this is the start
1601 // of a memory reference, otherwise this is a normal register reference.
1602 if (getLexer().isNot(AsmToken::Colon))
1603 return X86Operand::CreateReg(RegNo, Start, End);
1605 getParser().Lex(); // Eat the colon.
1606 return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
1610 return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
1613 X86Operand *X86AsmParser::ParseATTOperand() {
1614 switch (getLexer().getKind()) {
1616 // Parse a memory operand with no segment register.
1617 return ParseMemOperand(0, Parser.getTok().getLoc());
1618 case AsmToken::Percent: {
1619 // Read the register.
1622 if (ParseRegister(RegNo, Start, End)) return 0;
1623 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1624 Error(Start, "%eiz and %riz can only be used as index registers",
1625 SMRange(Start, End));
1629 // If this is a segment register followed by a ':', then this is the start
1630 // of a memory reference, otherwise this is a normal register reference.
1631 if (getLexer().isNot(AsmToken::Colon))
1632 return X86Operand::CreateReg(RegNo, Start, End);
1634 getParser().Lex(); // Eat the colon.
1635 return ParseMemOperand(RegNo, Start);
1637 case AsmToken::Dollar: {
1638 // $42 -> immediate.
1639 SMLoc Start = Parser.getTok().getLoc(), End;
1642 if (getParser().parseExpression(Val, End))
1644 return X86Operand::CreateImm(Val, Start, End);
1649 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1650 /// has already been parsed if present.
1651 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1653 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1654 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1655 // only way to do this without lookahead is to eat the '(' and see what is
1657 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1658 if (getLexer().isNot(AsmToken::LParen)) {
1660 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1662 // After parsing the base expression we could either have a parenthesized
1663 // memory address or not. If not, return now. If so, eat the (.
1664 if (getLexer().isNot(AsmToken::LParen)) {
1665 // Unless we have a segment register, treat this as an immediate.
1667 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1668 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1674 // Okay, we have a '('. We don't know if this is an expression or not, but
1675 // so we have to eat the ( to see beyond it.
1676 SMLoc LParenLoc = Parser.getTok().getLoc();
1677 Parser.Lex(); // Eat the '('.
1679 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1680 // Nothing to do here, fall into the code below with the '(' part of the
1681 // memory operand consumed.
1685 // It must be an parenthesized expression, parse it now.
1686 if (getParser().parseParenExpression(Disp, ExprEnd))
1689 // After parsing the base expression we could either have a parenthesized
1690 // memory address or not. If not, return now. If so, eat the (.
1691 if (getLexer().isNot(AsmToken::LParen)) {
1692 // Unless we have a segment register, treat this as an immediate.
1694 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1695 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1703 // If we reached here, then we just ate the ( of the memory operand. Process
1704 // the rest of the memory operand.
1705 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1708 if (getLexer().is(AsmToken::Percent)) {
1709 SMLoc StartLoc, EndLoc;
1710 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1711 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1712 Error(StartLoc, "eiz and riz can only be used as index registers",
1713 SMRange(StartLoc, EndLoc));
1718 if (getLexer().is(AsmToken::Comma)) {
1719 Parser.Lex(); // Eat the comma.
1720 IndexLoc = Parser.getTok().getLoc();
1722 // Following the comma we should have either an index register, or a scale
1723 // value. We don't support the later form, but we want to parse it
1726 // Not that even though it would be completely consistent to support syntax
1727 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1728 if (getLexer().is(AsmToken::Percent)) {
1730 if (ParseRegister(IndexReg, L, L)) return 0;
1732 if (getLexer().isNot(AsmToken::RParen)) {
1733 // Parse the scale amount:
1734 // ::= ',' [scale-expression]
1735 if (getLexer().isNot(AsmToken::Comma)) {
1736 Error(Parser.getTok().getLoc(),
1737 "expected comma in scale expression");
1740 Parser.Lex(); // Eat the comma.
1742 if (getLexer().isNot(AsmToken::RParen)) {
1743 SMLoc Loc = Parser.getTok().getLoc();
1746 if (getParser().parseAbsoluteExpression(ScaleVal)){
1747 Error(Loc, "expected scale expression");
1751 // Validate the scale amount.
1752 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1753 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1756 Scale = (unsigned)ScaleVal;
1759 } else if (getLexer().isNot(AsmToken::RParen)) {
1760 // A scale amount without an index is ignored.
1762 SMLoc Loc = Parser.getTok().getLoc();
1765 if (getParser().parseAbsoluteExpression(Value))
1769 Warning(Loc, "scale factor without index register is ignored");
1774 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1775 if (getLexer().isNot(AsmToken::RParen)) {
1776 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1779 SMLoc MemEnd = Parser.getTok().getEndLoc();
1780 Parser.Lex(); // Eat the ')'.
1782 // If we have both a base register and an index register make sure they are
1783 // both 64-bit or 32-bit registers.
1784 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1785 if (BaseReg != 0 && IndexReg != 0) {
1786 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1787 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1788 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1789 IndexReg != X86::RIZ) {
1790 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
1793 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1794 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1795 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1796 IndexReg != X86::EIZ){
1797 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
1802 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1807 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1808 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1810 StringRef PatchedName = Name;
1812 // FIXME: Hack to recognize setneb as setne.
1813 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1814 PatchedName != "setb" && PatchedName != "setnb")
1815 PatchedName = PatchedName.substr(0, Name.size()-1);
1817 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1818 const MCExpr *ExtraImmOp = 0;
1819 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1820 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1821 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1822 bool IsVCMP = PatchedName[0] == 'v';
1823 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1824 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1825 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1829 .Case("unord", 0x03)
1834 /* AVX only from here */
1835 .Case("eq_uq", 0x08)
1838 .Case("false", 0x0B)
1839 .Case("neq_oq", 0x0C)
1843 .Case("eq_os", 0x10)
1844 .Case("lt_oq", 0x11)
1845 .Case("le_oq", 0x12)
1846 .Case("unord_s", 0x13)
1847 .Case("neq_us", 0x14)
1848 .Case("nlt_uq", 0x15)
1849 .Case("nle_uq", 0x16)
1850 .Case("ord_s", 0x17)
1851 .Case("eq_us", 0x18)
1852 .Case("nge_uq", 0x19)
1853 .Case("ngt_uq", 0x1A)
1854 .Case("false_os", 0x1B)
1855 .Case("neq_os", 0x1C)
1856 .Case("ge_oq", 0x1D)
1857 .Case("gt_oq", 0x1E)
1858 .Case("true_us", 0x1F)
1860 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1861 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1862 getParser().getContext());
1863 if (PatchedName.endswith("ss")) {
1864 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1865 } else if (PatchedName.endswith("sd")) {
1866 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1867 } else if (PatchedName.endswith("ps")) {
1868 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1870 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1871 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1876 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1878 if (ExtraImmOp && !isParsingIntelSyntax())
1879 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1881 // Determine whether this is an instruction prefix.
1883 Name == "lock" || Name == "rep" ||
1884 Name == "repe" || Name == "repz" ||
1885 Name == "repne" || Name == "repnz" ||
1886 Name == "rex64" || Name == "data16";
1889 // This does the actual operand parsing. Don't parse any more if we have a
1890 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1891 // just want to parse the "lock" as the first instruction and the "incl" as
1893 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1895 // Parse '*' modifier.
1896 if (getLexer().is(AsmToken::Star)) {
1897 SMLoc Loc = Parser.getTok().getLoc();
1898 Operands.push_back(X86Operand::CreateToken("*", Loc));
1899 Parser.Lex(); // Eat the star.
1902 // Read the first operand.
1903 if (X86Operand *Op = ParseOperand())
1904 Operands.push_back(Op);
1906 Parser.eatToEndOfStatement();
1910 while (getLexer().is(AsmToken::Comma)) {
1911 Parser.Lex(); // Eat the comma.
1913 // Parse and remember the operand.
1914 if (X86Operand *Op = ParseOperand())
1915 Operands.push_back(Op);
1917 Parser.eatToEndOfStatement();
1922 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1923 SMLoc Loc = getLexer().getLoc();
1924 Parser.eatToEndOfStatement();
1925 return Error(Loc, "unexpected token in argument list");
1929 if (getLexer().is(AsmToken::EndOfStatement))
1930 Parser.Lex(); // Consume the EndOfStatement
1931 else if (isPrefix && getLexer().is(AsmToken::Slash))
1932 Parser.Lex(); // Consume the prefix separator Slash
1934 if (ExtraImmOp && isParsingIntelSyntax())
1935 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1937 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1938 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1939 // documented form in various unofficial manuals, so a lot of code uses it.
1940 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1941 Operands.size() == 3) {
1942 X86Operand &Op = *(X86Operand*)Operands.back();
1943 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1944 isa<MCConstantExpr>(Op.Mem.Disp) &&
1945 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1946 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1947 SMLoc Loc = Op.getEndLoc();
1948 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1952 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1953 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1954 Operands.size() == 3) {
1955 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1956 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1957 isa<MCConstantExpr>(Op.Mem.Disp) &&
1958 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1959 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1960 SMLoc Loc = Op.getEndLoc();
1961 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1965 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
1966 if (Name.startswith("ins") && Operands.size() == 3 &&
1967 (Name == "insb" || Name == "insw" || Name == "insl")) {
1968 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1969 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1970 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
1971 Operands.pop_back();
1972 Operands.pop_back();
1978 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
1979 if (Name.startswith("outs") && Operands.size() == 3 &&
1980 (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
1981 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1982 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1983 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
1984 Operands.pop_back();
1985 Operands.pop_back();
1991 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
1992 if (Name.startswith("movs") && Operands.size() == 3 &&
1993 (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
1994 (is64BitMode() && Name == "movsq"))) {
1995 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1996 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1997 if (isSrcOp(Op) && isDstOp(Op2)) {
1998 Operands.pop_back();
1999 Operands.pop_back();
2004 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
2005 if (Name.startswith("lods") && Operands.size() == 3 &&
2006 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2007 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
2008 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2009 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
2010 if (isSrcOp(*Op1) && Op2->isReg()) {
2012 unsigned reg = Op2->getReg();
2013 bool isLods = Name == "lods";
2014 if (reg == X86::AL && (isLods || Name == "lodsb"))
2016 else if (reg == X86::AX && (isLods || Name == "lodsw"))
2018 else if (reg == X86::EAX && (isLods || Name == "lodsl"))
2020 else if (reg == X86::RAX && (isLods || Name == "lodsq"))
2025 Operands.pop_back();
2026 Operands.pop_back();
2030 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
2034 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
2035 if (Name.startswith("stos") && Operands.size() == 3 &&
2036 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2037 Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
2038 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2039 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
2040 if (isDstOp(*Op2) && Op1->isReg()) {
2042 unsigned reg = Op1->getReg();
2043 bool isStos = Name == "stos";
2044 if (reg == X86::AL && (isStos || Name == "stosb"))
2046 else if (reg == X86::AX && (isStos || Name == "stosw"))
2048 else if (reg == X86::EAX && (isStos || Name == "stosl"))
2050 else if (reg == X86::RAX && (isStos || Name == "stosq"))
2055 Operands.pop_back();
2056 Operands.pop_back();
2060 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
2065 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2067 if ((Name.startswith("shr") || Name.startswith("sar") ||
2068 Name.startswith("shl") || Name.startswith("sal") ||
2069 Name.startswith("rcl") || Name.startswith("rcr") ||
2070 Name.startswith("rol") || Name.startswith("ror")) &&
2071 Operands.size() == 3) {
2072 if (isParsingIntelSyntax()) {
2074 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2075 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2076 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2078 Operands.pop_back();
2081 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2082 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2083 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2085 Operands.erase(Operands.begin() + 1);
2090 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2091 // instalias with an immediate operand yet.
2092 if (Name == "int" && Operands.size() == 2) {
2093 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2094 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2095 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2097 Operands.erase(Operands.begin() + 1);
2098 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2105 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2108 TmpInst.setOpcode(Opcode);
2110 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2111 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2112 TmpInst.addOperand(Inst.getOperand(0));
2117 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2118 bool isCmp = false) {
2119 if (!Inst.getOperand(0).isImm() ||
2120 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2123 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2126 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2127 bool isCmp = false) {
2128 if (!Inst.getOperand(0).isImm() ||
2129 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2132 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2135 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2136 bool isCmp = false) {
2137 if (!Inst.getOperand(0).isImm() ||
2138 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2141 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2145 processInstruction(MCInst &Inst,
2146 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2147 switch (Inst.getOpcode()) {
2148 default: return false;
2149 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2150 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2151 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2152 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2153 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2154 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2155 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2156 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2157 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2158 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2159 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2160 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2161 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2162 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2163 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2164 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2165 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2166 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2167 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2168 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2169 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2170 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2171 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2172 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2176 static const char *getSubtargetFeatureName(unsigned Val);
2178 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2179 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2180 MCStreamer &Out, unsigned &ErrorInfo,
2181 bool MatchingInlineAsm) {
2182 assert(!Operands.empty() && "Unexpect empty operand list!");
2183 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2184 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2185 ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
2187 // First, handle aliases that expand to multiple instructions.
2188 // FIXME: This should be replaced with a real .td file alias mechanism.
2189 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2191 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2192 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2193 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2194 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2196 Inst.setOpcode(X86::WAIT);
2198 if (!MatchingInlineAsm)
2199 Out.EmitInstruction(Inst);
2202 StringSwitch<const char*>(Op->getToken())
2203 .Case("finit", "fninit")
2204 .Case("fsave", "fnsave")
2205 .Case("fstcw", "fnstcw")
2206 .Case("fstcww", "fnstcw")
2207 .Case("fstenv", "fnstenv")
2208 .Case("fstsw", "fnstsw")
2209 .Case("fstsww", "fnstsw")
2210 .Case("fclex", "fnclex")
2212 assert(Repl && "Unknown wait-prefixed instruction");
2214 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2217 bool WasOriginallyInvalidOperand = false;
2220 // First, try a direct match.
2221 switch (MatchInstructionImpl(Operands, Inst,
2222 ErrorInfo, MatchingInlineAsm,
2223 isParsingIntelSyntax())) {
2226 // Some instructions need post-processing to, for example, tweak which
2227 // encoding is selected. Loop on it while changes happen so the
2228 // individual transformations can chain off each other.
2229 if (!MatchingInlineAsm)
2230 while (processInstruction(Inst, Operands))
2234 if (!MatchingInlineAsm)
2235 Out.EmitInstruction(Inst);
2236 Opcode = Inst.getOpcode();
2238 case Match_MissingFeature: {
2239 assert(ErrorInfo && "Unknown missing feature!");
2240 // Special case the error message for the very common case where only
2241 // a single subtarget feature is missing.
2242 std::string Msg = "instruction requires:";
2244 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2245 if (ErrorInfo & Mask) {
2247 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2251 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2253 case Match_InvalidOperand:
2254 WasOriginallyInvalidOperand = true;
2256 case Match_MnemonicFail:
2260 // FIXME: Ideally, we would only attempt suffix matches for things which are
2261 // valid prefixes, and we could just infer the right unambiguous
2262 // type. However, that requires substantially more matcher support than the
2265 // Change the operand to point to a temporary token.
2266 StringRef Base = Op->getToken();
2267 SmallString<16> Tmp;
2270 Op->setTokenValue(Tmp.str());
2272 // If this instruction starts with an 'f', then it is a floating point stack
2273 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2274 // 80-bit floating point, which use the suffixes s,l,t respectively.
2276 // Otherwise, we assume that this may be an integer instruction, which comes
2277 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2278 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2280 // Check for the various suffix matches.
2281 Tmp[Base.size()] = Suffixes[0];
2282 unsigned ErrorInfoIgnore;
2283 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2284 unsigned Match1, Match2, Match3, Match4;
2286 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2287 isParsingIntelSyntax());
2288 // If this returned as a missing feature failure, remember that.
2289 if (Match1 == Match_MissingFeature)
2290 ErrorInfoMissingFeature = ErrorInfoIgnore;
2291 Tmp[Base.size()] = Suffixes[1];
2292 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2293 isParsingIntelSyntax());
2294 // If this returned as a missing feature failure, remember that.
2295 if (Match2 == Match_MissingFeature)
2296 ErrorInfoMissingFeature = ErrorInfoIgnore;
2297 Tmp[Base.size()] = Suffixes[2];
2298 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2299 isParsingIntelSyntax());
2300 // If this returned as a missing feature failure, remember that.
2301 if (Match3 == Match_MissingFeature)
2302 ErrorInfoMissingFeature = ErrorInfoIgnore;
2303 Tmp[Base.size()] = Suffixes[3];
2304 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2305 isParsingIntelSyntax());
2306 // If this returned as a missing feature failure, remember that.
2307 if (Match4 == Match_MissingFeature)
2308 ErrorInfoMissingFeature = ErrorInfoIgnore;
2310 // Restore the old token.
2311 Op->setTokenValue(Base);
2313 // If exactly one matched, then we treat that as a successful match (and the
2314 // instruction will already have been filled in correctly, since the failing
2315 // matches won't have modified it).
2316 unsigned NumSuccessfulMatches =
2317 (Match1 == Match_Success) + (Match2 == Match_Success) +
2318 (Match3 == Match_Success) + (Match4 == Match_Success);
2319 if (NumSuccessfulMatches == 1) {
2321 if (!MatchingInlineAsm)
2322 Out.EmitInstruction(Inst);
2323 Opcode = Inst.getOpcode();
2327 // Otherwise, the match failed, try to produce a decent error message.
2329 // If we had multiple suffix matches, then identify this as an ambiguous
2331 if (NumSuccessfulMatches > 1) {
2333 unsigned NumMatches = 0;
2334 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2335 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2336 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2337 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2339 SmallString<126> Msg;
2340 raw_svector_ostream OS(Msg);
2341 OS << "ambiguous instructions require an explicit suffix (could be ";
2342 for (unsigned i = 0; i != NumMatches; ++i) {
2345 if (i + 1 == NumMatches)
2347 OS << "'" << Base << MatchChars[i] << "'";
2350 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2354 // Okay, we know that none of the variants matched successfully.
2356 // If all of the instructions reported an invalid mnemonic, then the original
2357 // mnemonic was invalid.
2358 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2359 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2360 if (!WasOriginallyInvalidOperand) {
2361 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2363 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2364 Ranges, MatchingInlineAsm);
2367 // Recover location info for the operand if we know which was the problem.
2368 if (ErrorInfo != ~0U) {
2369 if (ErrorInfo >= Operands.size())
2370 return Error(IDLoc, "too few operands for instruction",
2371 EmptyRanges, MatchingInlineAsm);
2373 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2374 if (Operand->getStartLoc().isValid()) {
2375 SMRange OperandRange = Operand->getLocRange();
2376 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2377 OperandRange, MatchingInlineAsm);
2381 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2385 // If one instruction matched with a missing feature, report this as a
2387 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2388 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2389 std::string Msg = "instruction requires:";
2391 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2392 if (ErrorInfoMissingFeature & Mask) {
2394 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2398 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2401 // If one instruction matched with an invalid operand, report this as an
2403 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2404 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2405 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2410 // If all of these were an outright failure, report it in a useless way.
2411 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2412 EmptyRanges, MatchingInlineAsm);
2417 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2418 StringRef IDVal = DirectiveID.getIdentifier();
2419 if (IDVal == ".word")
2420 return ParseDirectiveWord(2, DirectiveID.getLoc());
2421 else if (IDVal.startswith(".code"))
2422 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2423 else if (IDVal.startswith(".att_syntax")) {
2424 getParser().setAssemblerDialect(0);
2426 } else if (IDVal.startswith(".intel_syntax")) {
2427 getParser().setAssemblerDialect(1);
2428 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2429 if(Parser.getTok().getString() == "noprefix") {
2430 // FIXME : Handle noprefix
2440 /// ParseDirectiveWord
2441 /// ::= .word [ expression (, expression)* ]
2442 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2443 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2445 const MCExpr *Value;
2446 if (getParser().parseExpression(Value))
2449 getParser().getStreamer().EmitValue(Value, Size);
2451 if (getLexer().is(AsmToken::EndOfStatement))
2454 // FIXME: Improve diagnostic.
2455 if (getLexer().isNot(AsmToken::Comma))
2456 return Error(L, "unexpected token in directive");
2465 /// ParseDirectiveCode
2466 /// ::= .code32 | .code64
2467 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2468 if (IDVal == ".code32") {
2470 if (is64BitMode()) {
2472 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2474 } else if (IDVal == ".code64") {
2476 if (!is64BitMode()) {
2478 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2481 return Error(L, "unexpected directive " + IDVal);
2487 // Force static initialization.
2488 extern "C" void LLVMInitializeX86AsmParser() {
2489 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2490 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2493 #define GET_REGISTER_MATCHER
2494 #define GET_MATCHER_IMPLEMENTATION
2495 #define GET_SUBTARGET_FEATURE_NAME
2496 #include "X86GenAsmMatcher.inc"