1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/StringSwitch.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCParser/MCAsmLexer.h"
20 #include "llvm/MC/MCParser/MCAsmParser.h"
21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCStreamer.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCTargetAsmParser.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/TargetRegistry.h"
29 #include "llvm/Support/raw_ostream.h"
36 class X86AsmParser : public MCTargetAsmParser {
39 ParseInstructionInfo *InstInfo;
41 MCAsmParser &getParser() const { return Parser; }
43 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
45 bool Error(SMLoc L, const Twine &Msg,
46 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
47 bool MatchingInlineAsm = false) {
48 if (MatchingInlineAsm) return true;
49 return Parser.Error(L, Msg, Ranges);
52 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
57 X86Operand *ParseOperand();
58 X86Operand *ParseATTOperand();
59 X86Operand *ParseIntelOperand();
60 X86Operand *ParseIntelOffsetOfOperator();
61 X86Operand *ParseIntelOperator(unsigned OpKind);
62 X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
64 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc SizeDirLoc,
65 uint64_t ImmDisp, unsigned Size);
66 X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
68 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
70 X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
71 SMLoc SizeDirLoc, unsigned Size,
74 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
75 SmallString<64> &Err);
77 bool ParseDirectiveWord(unsigned Size, SMLoc L);
78 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
80 bool processInstruction(MCInst &Inst,
81 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
83 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
84 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
85 MCStreamer &Out, unsigned &ErrorInfo,
86 bool MatchingInlineAsm);
88 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
89 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
90 bool isSrcOp(X86Operand &Op);
92 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
93 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
94 bool isDstOp(X86Operand &Op);
96 bool is64BitMode() const {
97 // FIXME: Can tablegen auto-generate this?
98 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
101 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
102 setAvailableFeatures(FB);
105 /// @name Auto-generated Matcher Functions
108 #define GET_ASSEMBLER_HEADER
109 #include "X86GenAsmMatcher.inc"
114 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
115 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
117 // Initialize the set of available features.
118 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
120 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
122 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
124 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
126 virtual bool ParseDirective(AsmToken DirectiveID);
128 bool isParsingIntelSyntax() {
129 return getParser().getAssemblerDialect();
132 } // end anonymous namespace
134 /// @name Auto-generated Match Functions
137 static unsigned MatchRegisterName(StringRef Name);
141 static bool isImmSExti16i8Value(uint64_t Value) {
142 return (( Value <= 0x000000000000007FULL)||
143 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
144 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
147 static bool isImmSExti32i8Value(uint64_t Value) {
148 return (( Value <= 0x000000000000007FULL)||
149 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
150 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
153 static bool isImmZExtu32u8Value(uint64_t Value) {
154 return (Value <= 0x00000000000000FFULL);
157 static bool isImmSExti64i8Value(uint64_t Value) {
158 return (( Value <= 0x000000000000007FULL)||
159 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
162 static bool isImmSExti64i32Value(uint64_t Value) {
163 return (( Value <= 0x000000007FFFFFFFULL)||
164 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
168 /// X86Operand - Instances of this class represent a parsed X86 machine
170 struct X86Operand : public MCParsedAsmOperand {
178 SMLoc StartLoc, EndLoc;
212 X86Operand(KindTy K, SMLoc Start, SMLoc End)
213 : Kind(K), StartLoc(Start), EndLoc(End) {}
215 StringRef getSymName() { return SymName; }
217 /// getStartLoc - Get the location of the first token of this operand.
218 SMLoc getStartLoc() const { return StartLoc; }
219 /// getEndLoc - Get the location of the last token of this operand.
220 SMLoc getEndLoc() const { return EndLoc; }
221 /// getLocRange - Get the range between the first and last token of this
223 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
224 /// getOffsetOfLoc - Get the location of the offset operator.
225 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
227 virtual void print(raw_ostream &OS) const {}
229 StringRef getToken() const {
230 assert(Kind == Token && "Invalid access!");
231 return StringRef(Tok.Data, Tok.Length);
233 void setTokenValue(StringRef Value) {
234 assert(Kind == Token && "Invalid access!");
235 Tok.Data = Value.data();
236 Tok.Length = Value.size();
239 unsigned getReg() const {
240 assert(Kind == Register && "Invalid access!");
244 const MCExpr *getImm() const {
245 assert(Kind == Immediate && "Invalid access!");
249 const MCExpr *getMemDisp() const {
250 assert(Kind == Memory && "Invalid access!");
253 unsigned getMemSegReg() const {
254 assert(Kind == Memory && "Invalid access!");
257 unsigned getMemBaseReg() const {
258 assert(Kind == Memory && "Invalid access!");
261 unsigned getMemIndexReg() const {
262 assert(Kind == Memory && "Invalid access!");
265 unsigned getMemScale() const {
266 assert(Kind == Memory && "Invalid access!");
270 bool isToken() const {return Kind == Token; }
272 bool isImm() const { return Kind == Immediate; }
274 bool isImmSExti16i8() const {
278 // If this isn't a constant expr, just assume it fits and let relaxation
280 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
284 // Otherwise, check the value is in a range that makes sense for this
286 return isImmSExti16i8Value(CE->getValue());
288 bool isImmSExti32i8() const {
292 // If this isn't a constant expr, just assume it fits and let relaxation
294 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
298 // Otherwise, check the value is in a range that makes sense for this
300 return isImmSExti32i8Value(CE->getValue());
302 bool isImmZExtu32u8() const {
306 // If this isn't a constant expr, just assume it fits and let relaxation
308 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
312 // Otherwise, check the value is in a range that makes sense for this
314 return isImmZExtu32u8Value(CE->getValue());
316 bool isImmSExti64i8() const {
320 // If this isn't a constant expr, just assume it fits and let relaxation
322 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
326 // Otherwise, check the value is in a range that makes sense for this
328 return isImmSExti64i8Value(CE->getValue());
330 bool isImmSExti64i32() const {
334 // If this isn't a constant expr, just assume it fits and let relaxation
336 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
340 // Otherwise, check the value is in a range that makes sense for this
342 return isImmSExti64i32Value(CE->getValue());
345 bool isOffsetOf() const {
346 return OffsetOfLoc.getPointer();
349 bool needAddressOf() const {
353 bool isMem() const { return Kind == Memory; }
354 bool isMem8() const {
355 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
357 bool isMem16() const {
358 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
360 bool isMem32() const {
361 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
363 bool isMem64() const {
364 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
366 bool isMem80() const {
367 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
369 bool isMem128() const {
370 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
372 bool isMem256() const {
373 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
376 bool isMemVX32() const {
377 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
378 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
380 bool isMemVY32() const {
381 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
382 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
384 bool isMemVX64() const {
385 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
386 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
388 bool isMemVY64() const {
389 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
390 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
393 bool isAbsMem() const {
394 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
395 !getMemIndexReg() && getMemScale() == 1;
398 bool isReg() const { return Kind == Register; }
400 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
401 // Add as immediates when possible.
402 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
403 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
405 Inst.addOperand(MCOperand::CreateExpr(Expr));
408 void addRegOperands(MCInst &Inst, unsigned N) const {
409 assert(N == 1 && "Invalid number of operands!");
410 Inst.addOperand(MCOperand::CreateReg(getReg()));
413 void addImmOperands(MCInst &Inst, unsigned N) const {
414 assert(N == 1 && "Invalid number of operands!");
415 addExpr(Inst, getImm());
418 void addMem8Operands(MCInst &Inst, unsigned N) const {
419 addMemOperands(Inst, N);
421 void addMem16Operands(MCInst &Inst, unsigned N) const {
422 addMemOperands(Inst, N);
424 void addMem32Operands(MCInst &Inst, unsigned N) const {
425 addMemOperands(Inst, N);
427 void addMem64Operands(MCInst &Inst, unsigned N) const {
428 addMemOperands(Inst, N);
430 void addMem80Operands(MCInst &Inst, unsigned N) const {
431 addMemOperands(Inst, N);
433 void addMem128Operands(MCInst &Inst, unsigned N) const {
434 addMemOperands(Inst, N);
436 void addMem256Operands(MCInst &Inst, unsigned N) const {
437 addMemOperands(Inst, N);
439 void addMemVX32Operands(MCInst &Inst, unsigned N) const {
440 addMemOperands(Inst, N);
442 void addMemVY32Operands(MCInst &Inst, unsigned N) const {
443 addMemOperands(Inst, N);
445 void addMemVX64Operands(MCInst &Inst, unsigned N) const {
446 addMemOperands(Inst, N);
448 void addMemVY64Operands(MCInst &Inst, unsigned N) const {
449 addMemOperands(Inst, N);
452 void addMemOperands(MCInst &Inst, unsigned N) const {
453 assert((N == 5) && "Invalid number of operands!");
454 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
455 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
456 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
457 addExpr(Inst, getMemDisp());
458 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
461 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
462 assert((N == 1) && "Invalid number of operands!");
463 // Add as immediates when possible.
464 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
465 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
467 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
470 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
471 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
472 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
473 Res->Tok.Data = Str.data();
474 Res->Tok.Length = Str.size();
478 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
479 bool AddressOf = false,
480 SMLoc OffsetOfLoc = SMLoc(),
481 StringRef SymName = StringRef()) {
482 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
483 Res->Reg.RegNo = RegNo;
484 Res->AddressOf = AddressOf;
485 Res->OffsetOfLoc = OffsetOfLoc;
486 Res->SymName = SymName;
490 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
491 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
496 /// Create an absolute memory operand.
497 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
499 StringRef SymName = StringRef()) {
500 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
502 Res->Mem.Disp = Disp;
503 Res->Mem.BaseReg = 0;
504 Res->Mem.IndexReg = 0;
506 Res->Mem.Size = Size;
507 Res->SymName = SymName;
508 Res->AddressOf = false;
512 /// Create a generalized memory operand.
513 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
514 unsigned BaseReg, unsigned IndexReg,
515 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
517 StringRef SymName = StringRef()) {
518 // We should never just have a displacement, that should be parsed as an
519 // absolute memory operand.
520 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
522 // The scale should always be one of {1,2,4,8}.
523 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
525 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
526 Res->Mem.SegReg = SegReg;
527 Res->Mem.Disp = Disp;
528 Res->Mem.BaseReg = BaseReg;
529 Res->Mem.IndexReg = IndexReg;
530 Res->Mem.Scale = Scale;
531 Res->Mem.Size = Size;
532 Res->SymName = SymName;
533 Res->AddressOf = false;
538 } // end anonymous namespace.
540 bool X86AsmParser::isSrcOp(X86Operand &Op) {
541 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
543 return (Op.isMem() &&
544 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
545 isa<MCConstantExpr>(Op.Mem.Disp) &&
546 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
547 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
550 bool X86AsmParser::isDstOp(X86Operand &Op) {
551 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
554 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
555 isa<MCConstantExpr>(Op.Mem.Disp) &&
556 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
557 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
560 bool X86AsmParser::ParseRegister(unsigned &RegNo,
561 SMLoc &StartLoc, SMLoc &EndLoc) {
563 const AsmToken &PercentTok = Parser.getTok();
564 StartLoc = PercentTok.getLoc();
566 // If we encounter a %, ignore it. This code handles registers with and
567 // without the prefix, unprefixed registers can occur in cfi directives.
568 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
569 Parser.Lex(); // Eat percent token.
571 const AsmToken &Tok = Parser.getTok();
572 EndLoc = Tok.getEndLoc();
574 if (Tok.isNot(AsmToken::Identifier)) {
575 if (isParsingIntelSyntax()) return true;
576 return Error(StartLoc, "invalid register name",
577 SMRange(StartLoc, EndLoc));
580 RegNo = MatchRegisterName(Tok.getString());
582 // If the match failed, try the register name as lowercase.
584 RegNo = MatchRegisterName(Tok.getString().lower());
586 if (!is64BitMode()) {
587 // FIXME: This should be done using Requires<In32BitMode> and
588 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
590 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
592 if (RegNo == X86::RIZ ||
593 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
594 X86II::isX86_64NonExtLowByteReg(RegNo) ||
595 X86II::isX86_64ExtendedReg(RegNo))
596 return Error(StartLoc, "register %"
597 + Tok.getString() + " is only available in 64-bit mode",
598 SMRange(StartLoc, EndLoc));
601 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
602 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
604 Parser.Lex(); // Eat 'st'
606 // Check to see if we have '(4)' after %st.
607 if (getLexer().isNot(AsmToken::LParen))
612 const AsmToken &IntTok = Parser.getTok();
613 if (IntTok.isNot(AsmToken::Integer))
614 return Error(IntTok.getLoc(), "expected stack index");
615 switch (IntTok.getIntVal()) {
616 case 0: RegNo = X86::ST0; break;
617 case 1: RegNo = X86::ST1; break;
618 case 2: RegNo = X86::ST2; break;
619 case 3: RegNo = X86::ST3; break;
620 case 4: RegNo = X86::ST4; break;
621 case 5: RegNo = X86::ST5; break;
622 case 6: RegNo = X86::ST6; break;
623 case 7: RegNo = X86::ST7; break;
624 default: return Error(IntTok.getLoc(), "invalid stack index");
627 if (getParser().Lex().isNot(AsmToken::RParen))
628 return Error(Parser.getTok().getLoc(), "expected ')'");
630 EndLoc = Parser.getTok().getEndLoc();
631 Parser.Lex(); // Eat ')'
635 EndLoc = Parser.getTok().getEndLoc();
637 // If this is "db[0-7]", match it as an alias
639 if (RegNo == 0 && Tok.getString().size() == 3 &&
640 Tok.getString().startswith("db")) {
641 switch (Tok.getString()[2]) {
642 case '0': RegNo = X86::DR0; break;
643 case '1': RegNo = X86::DR1; break;
644 case '2': RegNo = X86::DR2; break;
645 case '3': RegNo = X86::DR3; break;
646 case '4': RegNo = X86::DR4; break;
647 case '5': RegNo = X86::DR5; break;
648 case '6': RegNo = X86::DR6; break;
649 case '7': RegNo = X86::DR7; break;
653 EndLoc = Parser.getTok().getEndLoc();
654 Parser.Lex(); // Eat it.
660 if (isParsingIntelSyntax()) return true;
661 return Error(StartLoc, "invalid register name",
662 SMRange(StartLoc, EndLoc));
665 Parser.Lex(); // Eat identifier token.
669 X86Operand *X86AsmParser::ParseOperand() {
670 if (isParsingIntelSyntax())
671 return ParseIntelOperand();
672 return ParseATTOperand();
675 /// getIntelMemOperandSize - Return intel memory operand size.
676 static unsigned getIntelMemOperandSize(StringRef OpStr) {
677 unsigned Size = StringSwitch<unsigned>(OpStr)
678 .Cases("BYTE", "byte", 8)
679 .Cases("WORD", "word", 16)
680 .Cases("DWORD", "dword", 32)
681 .Cases("QWORD", "qword", 64)
682 .Cases("XWORD", "xword", 80)
683 .Cases("XMMWORD", "xmmword", 128)
684 .Cases("YMMWORD", "ymmword", 256)
689 enum InfixCalculatorTok {
699 static const char OpPrecedence[] = {
710 class InfixCalculator {
711 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
712 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
713 SmallVector<ICToken, 4> PostfixStack;
716 int64_t popOperand() {
717 assert (!PostfixStack.empty() && "Poped an empty stack!");
718 ICToken Op = PostfixStack.pop_back_val();
719 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
720 && "Expected and immediate or register!");
723 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
724 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
725 "Unexpected operand!");
726 PostfixStack.push_back(std::make_pair(Op, Val));
729 void popOperator() { InfixOperatorStack.pop_back_val(); }
730 void pushOperator(InfixCalculatorTok Op) {
731 // Push the new operator if the stack is empty.
732 if (InfixOperatorStack.empty()) {
733 InfixOperatorStack.push_back(Op);
737 // Push the new operator if it has a higher precedence than the operator on
738 // the top of the stack or the operator on the top of the stack is a left
740 unsigned Idx = InfixOperatorStack.size() - 1;
741 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
742 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
743 InfixOperatorStack.push_back(Op);
747 // The operator on the top of the stack has higher precedence than the
749 unsigned ParenCount = 0;
751 // Nothing to process.
752 if (InfixOperatorStack.empty())
755 Idx = InfixOperatorStack.size() - 1;
756 StackOp = InfixOperatorStack[Idx];
757 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
760 // If we have an even parentheses count and we see a left parentheses,
761 // then stop processing.
762 if (!ParenCount && StackOp == IC_LPAREN)
765 if (StackOp == IC_RPAREN) {
767 InfixOperatorStack.pop_back_val();
768 } else if (StackOp == IC_LPAREN) {
770 InfixOperatorStack.pop_back_val();
772 InfixOperatorStack.pop_back_val();
773 PostfixStack.push_back(std::make_pair(StackOp, 0));
776 // Push the new operator.
777 InfixOperatorStack.push_back(Op);
780 // Push any remaining operators onto the postfix stack.
781 while (!InfixOperatorStack.empty()) {
782 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
783 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
784 PostfixStack.push_back(std::make_pair(StackOp, 0));
787 if (PostfixStack.empty())
790 SmallVector<ICToken, 16> OperandStack;
791 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
792 ICToken Op = PostfixStack[i];
793 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
794 OperandStack.push_back(Op);
796 assert (OperandStack.size() > 1 && "Too few operands.");
798 ICToken Op2 = OperandStack.pop_back_val();
799 ICToken Op1 = OperandStack.pop_back_val();
802 report_fatal_error("Unexpected operator!");
805 Val = Op1.second + Op2.second;
806 OperandStack.push_back(std::make_pair(IC_IMM, Val));
809 Val = Op1.second - Op2.second;
810 OperandStack.push_back(std::make_pair(IC_IMM, Val));
813 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
814 "Multiply operation with an immediate and a register!");
815 Val = Op1.second * Op2.second;
816 OperandStack.push_back(std::make_pair(IC_IMM, Val));
819 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
820 "Divide operation with an immediate and a register!");
821 assert (Op2.second != 0 && "Division by zero!");
822 Val = Op1.second / Op2.second;
823 OperandStack.push_back(std::make_pair(IC_IMM, Val));
828 assert (OperandStack.size() == 1 && "Expected a single result.");
829 return OperandStack.pop_back_val().second;
833 enum IntelBracExprState {
850 class IntelBracExprStateMachine {
851 IntelBracExprState State;
852 unsigned BaseReg, IndexReg, TmpReg, Scale;
856 IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
857 State(IBES_PLUS), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Disp(disp){}
859 unsigned getBaseReg() { return BaseReg; }
860 unsigned getIndexReg() { return IndexReg; }
861 unsigned getScale() { return Scale; }
862 int64_t getDisp() { return Disp + IC.execute(); }
863 bool isValidEndState() { return State == IBES_RBRAC; }
873 IC.pushOperator(IC_PLUS);
877 // If we already have a BaseReg, then assume this is the IndexReg with a
882 assert (!IndexReg && "BaseReg/IndexReg already set!");
886 IC.pushOperator(IC_PLUS);
897 IC.pushOperand(IC_IMM);
901 IC.pushOperator(IC_MINUS);
905 // If we already have a BaseReg, then assume this is the IndexReg with a
910 assert (!IndexReg && "BaseReg/IndexReg already set!");
914 IC.pushOperator(IC_MINUS);
918 void onRegister(unsigned Reg) {
925 State = IBES_REGISTER;
927 IC.pushOperand(IC_REGISTER);
929 case IBES_INTEGER_STAR:
930 assert (!IndexReg && "IndexReg already set!");
931 State = IBES_INTEGER;
933 Scale = IC.popOperand();
934 IC.pushOperand(IC_IMM);
946 State = IBES_INTEGER;
947 IC.pushOperand(IC_IMM);
951 void onInteger(int64_t TmpInt) {
961 case IBES_INTEGER_STAR:
962 State = IBES_INTEGER;
963 IC.pushOperand(IC_IMM, TmpInt);
965 case IBES_REGISTER_STAR:
966 assert (!IndexReg && "IndexReg already set!");
967 State = IBES_INTEGER;
980 State = IBES_INTEGER_STAR;
981 IC.pushOperator(IC_MULTIPLY);
984 State = IBES_REGISTER_STAR;
985 IC.pushOperator(IC_MULTIPLY);
988 State = IBES_MULTIPLY;
989 IC.pushOperator(IC_MULTIPLY);
1000 IC.pushOperator(IC_DIVIDE);
1011 IC.pushOperator(IC_PLUS);
1026 // If we already have a BaseReg, then assume this is the IndexReg with a
1031 assert (!IndexReg && "BaseReg/IndexReg already set!");
1047 case IBES_INTEGER_STAR:
1049 State = IBES_LPAREN;
1050 IC.pushOperator(IC_LPAREN);
1066 State = IBES_RPAREN;
1067 IC.pushOperator(IC_RPAREN);
1073 X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
1074 SMLoc End, SMLoc SizeDirLoc,
1075 unsigned Size, StringRef SymName) {
1076 bool NeedSizeDir = false;
1077 bool IsVarDecl = false;
1079 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
1080 const MCSymbol &Sym = SymRef->getSymbol();
1081 // FIXME: The SemaLookup will fail if the name is anything other then an
1083 // FIXME: Pass a valid SMLoc.
1084 unsigned tLength, tSize, tType;
1085 SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, tSize,
1088 Size = tType * 8; // Size is in terms of bits in this context.
1089 NeedSizeDir = Size > 0;
1093 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1094 // reference. We need an 'r' constraint here, so we need to create register
1095 // operand to ensure proper matching. Just pick a GPR based on the size of
1098 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1099 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, SMLoc(),
1104 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
1107 // When parsing inline assembly we set the base register to a non-zero value
1108 // as we don't know the actual value at this time. This is necessary to
1109 // get the matching correct in some cases.
1110 return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
1111 /*Scale*/1, Start, End, Size, SymName);
1114 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
1118 const AsmToken &Tok = Parser.getTok();
1119 SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
1122 if (getLexer().isNot(AsmToken::LBrac))
1123 return ErrorOperand(Start, "Expected '[' token!");
1126 unsigned TmpReg = 0;
1128 // Try to handle '[' 'Symbol' ']'
1129 if (getLexer().is(AsmToken::Identifier)) {
1130 if (ParseRegister(TmpReg, Start, End)) {
1132 SMLoc IdentStart = Tok.getLoc();
1133 if (getParser().parseExpression(Disp, End))
1136 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
1139 if (getLexer().isNot(AsmToken::RBrac))
1140 return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
1142 unsigned Len = Tok.getLoc().getPointer() - IdentStart.getPointer();
1143 StringRef SymName(IdentStart.getPointer(), Len);
1144 Parser.Lex(); // Eat ']'
1145 if (!isParsingInlineAsm())
1146 return X86Operand::CreateMem(Disp, Start, End, Size, SymName);
1147 return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size, SymName);
1151 // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an
1152 // immediate displacement before the bracketed expression.
1154 IntelBracExprStateMachine SM(Parser, ImmDisp);
1156 // If we parsed a register, then the end loc has already been set and
1157 // the identifier has already been lexed. We also need to update the
1160 SM.onRegister(TmpReg);
1162 const MCExpr *Disp = 0;
1164 bool UpdateLocLex = true;
1166 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1167 // identifier. Don't try an parse it as a register.
1168 if (Tok.getString().startswith("."))
1171 switch (getLexer().getKind()) {
1173 if (SM.isValidEndState()) {
1177 return ErrorOperand(Tok.getLoc(), "Unexpected token!");
1179 case AsmToken::Identifier: {
1180 // This could be a register or a displacement expression.
1181 if(!ParseRegister(TmpReg, Start, End)) {
1182 SM.onRegister(TmpReg);
1183 UpdateLocLex = false;
1185 } else if (!getParser().parsePrimaryExpr(Disp, End)) {
1187 UpdateLocLex = false;
1190 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
1192 case AsmToken::Integer:
1193 if (isParsingInlineAsm())
1194 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1196 SM.onInteger(Tok.getIntVal());
1198 case AsmToken::Plus: SM.onPlus(); break;
1199 case AsmToken::Minus: SM.onMinus(); break;
1200 case AsmToken::Star: SM.onStar(); break;
1201 case AsmToken::Slash: SM.onDivide(); break;
1202 case AsmToken::LBrac: SM.onLBrac(); break;
1203 case AsmToken::RBrac: SM.onRBrac(); break;
1204 case AsmToken::LParen: SM.onLParen(); break;
1205 case AsmToken::RParen: SM.onRParen(); break;
1207 if (!Done && UpdateLocLex) {
1209 Parser.Lex(); // Consume the token.
1214 Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
1216 // Parse the dot operator (e.g., [ebx].foo.bar).
1217 if (Tok.getString().startswith(".")) {
1218 SmallString<64> Err;
1219 const MCExpr *NewDisp;
1220 if (ParseIntelDotOperator(Disp, &NewDisp, Err))
1221 return ErrorOperand(Tok.getLoc(), Err);
1223 End = Parser.getTok().getEndLoc();
1224 Parser.Lex(); // Eat the field.
1229 int BaseReg = SM.getBaseReg();
1230 int IndexReg = SM.getIndexReg();
1233 if (!BaseReg && !IndexReg) {
1235 return X86Operand::CreateMem(Disp, Start, End, Size);
1237 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1240 int Scale = SM.getScale();
1241 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1245 // Inline assembly may use variable names with namespace alias qualifiers.
1246 X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
1247 SMLoc &IdentStart) {
1248 // We should only see Foo::Bar if we're parsing inline assembly.
1249 if (!isParsingInlineAsm())
1252 // If we don't see a ':' then there can't be a qualifier.
1253 if (getLexer().isNot(AsmToken::Colon))
1257 const AsmToken &Tok = Parser.getTok();
1259 switch (getLexer().getKind()) {
1263 case AsmToken::Colon:
1264 getLexer().Lex(); // Consume ':'.
1265 if (getLexer().isNot(AsmToken::Colon))
1266 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1267 getLexer().Lex(); // Consume second ':'.
1268 if (getLexer().isNot(AsmToken::Identifier))
1269 return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
1271 case AsmToken::Identifier:
1272 getLexer().Lex(); // Consume the identifier.
1276 size_t Len = Tok.getLoc().getPointer() - IdentStart.getPointer();
1277 StringRef Identifier(IdentStart.getPointer(), Len);
1278 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1279 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1280 Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1284 /// ParseIntelMemOperand - Parse intel style memory operand.
1285 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
1288 const AsmToken &Tok = Parser.getTok();
1291 unsigned Size = getIntelMemOperandSize(Tok.getString());
1294 assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
1295 "Unexpected token!");
1299 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1300 if (getLexer().is(AsmToken::Integer)) {
1301 const AsmToken &IntTok = Parser.getTok();
1302 if (isParsingInlineAsm())
1303 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1305 uint64_t ImmDisp = IntTok.getIntVal();
1306 Parser.Lex(); // Eat the integer.
1307 if (getLexer().isNot(AsmToken::LBrac))
1308 return ErrorOperand(Start, "Expected '[' token!");
1309 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1312 if (getLexer().is(AsmToken::LBrac))
1313 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1315 if (!ParseRegister(SegReg, Start, End)) {
1316 // Handel SegReg : [ ... ]
1317 if (getLexer().isNot(AsmToken::Colon))
1318 return ErrorOperand(Start, "Expected ':' token!");
1319 Parser.Lex(); // Eat :
1320 if (getLexer().isNot(AsmToken::LBrac))
1321 return ErrorOperand(Start, "Expected '[' token!");
1322 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1325 const MCExpr *Disp = 0;
1326 SMLoc IdentStart = Tok.getLoc();
1327 if (getParser().parseExpression(Disp, End))
1330 if (!isParsingInlineAsm())
1331 return X86Operand::CreateMem(Disp, Start, End, Size);
1333 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
1336 unsigned Len = Tok.getLoc().getPointer() - IdentStart.getPointer();
1337 StringRef SymName(IdentStart.getPointer(), Len);
1338 return CreateMemForInlineAsm(Disp, Start, End, Start, Size, SymName);
1341 /// Parse the '.' operator.
1342 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1343 const MCExpr **NewDisp,
1344 SmallString<64> &Err) {
1345 AsmToken Tok = *&Parser.getTok();
1346 uint64_t OrigDispVal, DotDispVal;
1348 // FIXME: Handle non-constant expressions.
1349 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
1350 OrigDispVal = OrigDisp->getValue();
1352 Err = "Non-constant offsets are not supported!";
1357 StringRef DotDispStr = Tok.getString().drop_front(1);
1359 // .Imm gets lexed as a real.
1360 if (Tok.is(AsmToken::Real)) {
1362 DotDispStr.getAsInteger(10, DotDisp);
1363 DotDispVal = DotDisp.getZExtValue();
1364 } else if (Tok.is(AsmToken::Identifier)) {
1365 // We should only see an identifier when parsing the original inline asm.
1366 // The front-end should rewrite this in terms of immediates.
1367 assert (isParsingInlineAsm() && "Unexpected field name!");
1370 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1371 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1373 Err = "Unable to lookup field reference!";
1376 DotDispVal = DotDisp;
1378 Err = "Unexpected token type!";
1382 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1383 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1384 unsigned Len = DotDispStr.size();
1385 unsigned Val = OrigDispVal + DotDispVal;
1386 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1390 *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1394 /// Parse the 'offset' operator. This operator is used to specify the
1395 /// location rather then the content of a variable.
1396 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1397 const AsmToken &Tok = Parser.getTok();
1398 SMLoc OffsetOfLoc = Tok.getLoc();
1399 Parser.Lex(); // Eat offset.
1400 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1403 SMLoc Start = Tok.getLoc(), End;
1404 if (getParser().parsePrimaryExpr(Val, End))
1405 return ErrorOperand(Start, "Unable to parse expression!");
1407 // Don't emit the offset operator.
1408 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1410 // The offset operator will have an 'r' constraint, thus we need to create
1411 // register operand to ensure proper matching. Just pick a GPR based on
1412 // the size of a pointer.
1413 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1414 unsigned Len = End.getPointer() - Start.getPointer();
1415 StringRef SymName(Start.getPointer(), Len);
1416 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1417 OffsetOfLoc, SymName);
1420 enum IntelOperatorKind {
1426 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1427 /// returns the number of elements in an array. It returns the value 1 for
1428 /// non-array variables. The SIZE operator returns the size of a C or C++
1429 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1430 /// TYPE operator returns the size of a C or C++ type or variable. If the
1431 /// variable is an array, TYPE returns the size of a single element.
1432 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1433 const AsmToken &Tok = Parser.getTok();
1434 SMLoc TypeLoc = Tok.getLoc();
1435 Parser.Lex(); // Eat operator.
1436 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1439 SMLoc Start = Tok.getLoc(), End;
1440 if (getParser().parsePrimaryExpr(Val, End))
1443 unsigned Length = 0, Size = 0, Type = 0;
1444 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
1445 const MCSymbol &Sym = SymRef->getSymbol();
1446 // FIXME: The SemaLookup will fail if the name is anything other then an
1448 // FIXME: Pass a valid SMLoc.
1450 if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
1451 Size, Type, IsVarDecl))
1452 return ErrorOperand(Start, "Unable to lookup expr!");
1456 default: llvm_unreachable("Unexpected operand kind!");
1457 case IOK_LENGTH: CVal = Length; break;
1458 case IOK_SIZE: CVal = Size; break;
1459 case IOK_TYPE: CVal = Type; break;
1462 // Rewrite the type operator and the C or C++ type or variable in terms of an
1463 // immediate. E.g. TYPE foo -> $$4
1464 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1465 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1467 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1468 return X86Operand::CreateImm(Imm, Start, End);
1471 X86Operand *X86AsmParser::ParseIntelOperand() {
1472 SMLoc Start = Parser.getTok().getLoc(), End;
1473 StringRef AsmTokStr = Parser.getTok().getString();
1475 // Offset, length, type and size operators.
1476 if (isParsingInlineAsm()) {
1477 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1478 return ParseIntelOffsetOfOperator();
1479 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1480 return ParseIntelOperator(IOK_LENGTH);
1481 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1482 return ParseIntelOperator(IOK_SIZE);
1483 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1484 return ParseIntelOperator(IOK_TYPE);
1488 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
1489 getLexer().is(AsmToken::Minus)) {
1491 bool isInteger = getLexer().is(AsmToken::Integer);
1492 if (!getParser().parseExpression(Val, End)) {
1493 if (isParsingInlineAsm())
1494 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1496 if (getLexer().isNot(AsmToken::LBrac))
1497 return X86Operand::CreateImm(Val, Start, End);
1499 // Only positive immediates are valid.
1501 Error(Parser.getTok().getLoc(), "expected a positive immediate "
1502 "displacement before bracketed expr.");
1506 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1507 if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
1508 return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
1514 if (!ParseRegister(RegNo, Start, End)) {
1515 // If this is a segment register followed by a ':', then this is the start
1516 // of a memory reference, otherwise this is a normal register reference.
1517 if (getLexer().isNot(AsmToken::Colon))
1518 return X86Operand::CreateReg(RegNo, Start, End);
1520 getParser().Lex(); // Eat the colon.
1521 return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
1525 return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
1528 X86Operand *X86AsmParser::ParseATTOperand() {
1529 switch (getLexer().getKind()) {
1531 // Parse a memory operand with no segment register.
1532 return ParseMemOperand(0, Parser.getTok().getLoc());
1533 case AsmToken::Percent: {
1534 // Read the register.
1537 if (ParseRegister(RegNo, Start, End)) return 0;
1538 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1539 Error(Start, "%eiz and %riz can only be used as index registers",
1540 SMRange(Start, End));
1544 // If this is a segment register followed by a ':', then this is the start
1545 // of a memory reference, otherwise this is a normal register reference.
1546 if (getLexer().isNot(AsmToken::Colon))
1547 return X86Operand::CreateReg(RegNo, Start, End);
1549 getParser().Lex(); // Eat the colon.
1550 return ParseMemOperand(RegNo, Start);
1552 case AsmToken::Dollar: {
1553 // $42 -> immediate.
1554 SMLoc Start = Parser.getTok().getLoc(), End;
1557 if (getParser().parseExpression(Val, End))
1559 return X86Operand::CreateImm(Val, Start, End);
1564 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1565 /// has already been parsed if present.
1566 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1568 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1569 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1570 // only way to do this without lookahead is to eat the '(' and see what is
1572 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1573 if (getLexer().isNot(AsmToken::LParen)) {
1575 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1577 // After parsing the base expression we could either have a parenthesized
1578 // memory address or not. If not, return now. If so, eat the (.
1579 if (getLexer().isNot(AsmToken::LParen)) {
1580 // Unless we have a segment register, treat this as an immediate.
1582 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1583 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1589 // Okay, we have a '('. We don't know if this is an expression or not, but
1590 // so we have to eat the ( to see beyond it.
1591 SMLoc LParenLoc = Parser.getTok().getLoc();
1592 Parser.Lex(); // Eat the '('.
1594 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1595 // Nothing to do here, fall into the code below with the '(' part of the
1596 // memory operand consumed.
1600 // It must be an parenthesized expression, parse it now.
1601 if (getParser().parseParenExpression(Disp, ExprEnd))
1604 // After parsing the base expression we could either have a parenthesized
1605 // memory address or not. If not, return now. If so, eat the (.
1606 if (getLexer().isNot(AsmToken::LParen)) {
1607 // Unless we have a segment register, treat this as an immediate.
1609 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1610 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1618 // If we reached here, then we just ate the ( of the memory operand. Process
1619 // the rest of the memory operand.
1620 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1623 if (getLexer().is(AsmToken::Percent)) {
1624 SMLoc StartLoc, EndLoc;
1625 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1626 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1627 Error(StartLoc, "eiz and riz can only be used as index registers",
1628 SMRange(StartLoc, EndLoc));
1633 if (getLexer().is(AsmToken::Comma)) {
1634 Parser.Lex(); // Eat the comma.
1635 IndexLoc = Parser.getTok().getLoc();
1637 // Following the comma we should have either an index register, or a scale
1638 // value. We don't support the later form, but we want to parse it
1641 // Not that even though it would be completely consistent to support syntax
1642 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1643 if (getLexer().is(AsmToken::Percent)) {
1645 if (ParseRegister(IndexReg, L, L)) return 0;
1647 if (getLexer().isNot(AsmToken::RParen)) {
1648 // Parse the scale amount:
1649 // ::= ',' [scale-expression]
1650 if (getLexer().isNot(AsmToken::Comma)) {
1651 Error(Parser.getTok().getLoc(),
1652 "expected comma in scale expression");
1655 Parser.Lex(); // Eat the comma.
1657 if (getLexer().isNot(AsmToken::RParen)) {
1658 SMLoc Loc = Parser.getTok().getLoc();
1661 if (getParser().parseAbsoluteExpression(ScaleVal)){
1662 Error(Loc, "expected scale expression");
1666 // Validate the scale amount.
1667 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1668 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1671 Scale = (unsigned)ScaleVal;
1674 } else if (getLexer().isNot(AsmToken::RParen)) {
1675 // A scale amount without an index is ignored.
1677 SMLoc Loc = Parser.getTok().getLoc();
1680 if (getParser().parseAbsoluteExpression(Value))
1684 Warning(Loc, "scale factor without index register is ignored");
1689 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1690 if (getLexer().isNot(AsmToken::RParen)) {
1691 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1694 SMLoc MemEnd = Parser.getTok().getEndLoc();
1695 Parser.Lex(); // Eat the ')'.
1697 // If we have both a base register and an index register make sure they are
1698 // both 64-bit or 32-bit registers.
1699 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1700 if (BaseReg != 0 && IndexReg != 0) {
1701 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1702 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1703 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1704 IndexReg != X86::RIZ) {
1705 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
1708 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1709 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1710 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1711 IndexReg != X86::EIZ){
1712 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
1717 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1722 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1723 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1725 StringRef PatchedName = Name;
1727 // FIXME: Hack to recognize setneb as setne.
1728 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1729 PatchedName != "setb" && PatchedName != "setnb")
1730 PatchedName = PatchedName.substr(0, Name.size()-1);
1732 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1733 const MCExpr *ExtraImmOp = 0;
1734 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1735 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1736 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1737 bool IsVCMP = PatchedName[0] == 'v';
1738 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1739 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1740 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1744 .Case("unord", 0x03)
1749 /* AVX only from here */
1750 .Case("eq_uq", 0x08)
1753 .Case("false", 0x0B)
1754 .Case("neq_oq", 0x0C)
1758 .Case("eq_os", 0x10)
1759 .Case("lt_oq", 0x11)
1760 .Case("le_oq", 0x12)
1761 .Case("unord_s", 0x13)
1762 .Case("neq_us", 0x14)
1763 .Case("nlt_uq", 0x15)
1764 .Case("nle_uq", 0x16)
1765 .Case("ord_s", 0x17)
1766 .Case("eq_us", 0x18)
1767 .Case("nge_uq", 0x19)
1768 .Case("ngt_uq", 0x1A)
1769 .Case("false_os", 0x1B)
1770 .Case("neq_os", 0x1C)
1771 .Case("ge_oq", 0x1D)
1772 .Case("gt_oq", 0x1E)
1773 .Case("true_us", 0x1F)
1775 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1776 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1777 getParser().getContext());
1778 if (PatchedName.endswith("ss")) {
1779 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1780 } else if (PatchedName.endswith("sd")) {
1781 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1782 } else if (PatchedName.endswith("ps")) {
1783 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1785 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1786 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1791 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1793 if (ExtraImmOp && !isParsingIntelSyntax())
1794 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1796 // Determine whether this is an instruction prefix.
1798 Name == "lock" || Name == "rep" ||
1799 Name == "repe" || Name == "repz" ||
1800 Name == "repne" || Name == "repnz" ||
1801 Name == "rex64" || Name == "data16";
1804 // This does the actual operand parsing. Don't parse any more if we have a
1805 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1806 // just want to parse the "lock" as the first instruction and the "incl" as
1808 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1810 // Parse '*' modifier.
1811 if (getLexer().is(AsmToken::Star)) {
1812 SMLoc Loc = Parser.getTok().getLoc();
1813 Operands.push_back(X86Operand::CreateToken("*", Loc));
1814 Parser.Lex(); // Eat the star.
1817 // Read the first operand.
1818 if (X86Operand *Op = ParseOperand())
1819 Operands.push_back(Op);
1821 Parser.eatToEndOfStatement();
1825 while (getLexer().is(AsmToken::Comma)) {
1826 Parser.Lex(); // Eat the comma.
1828 // Parse and remember the operand.
1829 if (X86Operand *Op = ParseOperand())
1830 Operands.push_back(Op);
1832 Parser.eatToEndOfStatement();
1837 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1838 SMLoc Loc = getLexer().getLoc();
1839 Parser.eatToEndOfStatement();
1840 return Error(Loc, "unexpected token in argument list");
1844 if (getLexer().is(AsmToken::EndOfStatement))
1845 Parser.Lex(); // Consume the EndOfStatement
1846 else if (isPrefix && getLexer().is(AsmToken::Slash))
1847 Parser.Lex(); // Consume the prefix separator Slash
1849 if (ExtraImmOp && isParsingIntelSyntax())
1850 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1852 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1853 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1854 // documented form in various unofficial manuals, so a lot of code uses it.
1855 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1856 Operands.size() == 3) {
1857 X86Operand &Op = *(X86Operand*)Operands.back();
1858 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1859 isa<MCConstantExpr>(Op.Mem.Disp) &&
1860 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1861 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1862 SMLoc Loc = Op.getEndLoc();
1863 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1867 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1868 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1869 Operands.size() == 3) {
1870 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1871 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1872 isa<MCConstantExpr>(Op.Mem.Disp) &&
1873 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1874 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1875 SMLoc Loc = Op.getEndLoc();
1876 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1880 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
1881 if (Name.startswith("ins") && Operands.size() == 3 &&
1882 (Name == "insb" || Name == "insw" || Name == "insl")) {
1883 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1884 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1885 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
1886 Operands.pop_back();
1887 Operands.pop_back();
1893 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
1894 if (Name.startswith("outs") && Operands.size() == 3 &&
1895 (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
1896 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1897 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1898 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
1899 Operands.pop_back();
1900 Operands.pop_back();
1906 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
1907 if (Name.startswith("movs") && Operands.size() == 3 &&
1908 (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
1909 (is64BitMode() && Name == "movsq"))) {
1910 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1911 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1912 if (isSrcOp(Op) && isDstOp(Op2)) {
1913 Operands.pop_back();
1914 Operands.pop_back();
1919 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
1920 if (Name.startswith("lods") && Operands.size() == 3 &&
1921 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
1922 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
1923 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1924 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1925 if (isSrcOp(*Op1) && Op2->isReg()) {
1927 unsigned reg = Op2->getReg();
1928 bool isLods = Name == "lods";
1929 if (reg == X86::AL && (isLods || Name == "lodsb"))
1931 else if (reg == X86::AX && (isLods || Name == "lodsw"))
1933 else if (reg == X86::EAX && (isLods || Name == "lodsl"))
1935 else if (reg == X86::RAX && (isLods || Name == "lodsq"))
1940 Operands.pop_back();
1941 Operands.pop_back();
1945 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1949 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
1950 if (Name.startswith("stos") && Operands.size() == 3 &&
1951 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
1952 Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
1953 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1954 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1955 if (isDstOp(*Op2) && Op1->isReg()) {
1957 unsigned reg = Op1->getReg();
1958 bool isStos = Name == "stos";
1959 if (reg == X86::AL && (isStos || Name == "stosb"))
1961 else if (reg == X86::AX && (isStos || Name == "stosw"))
1963 else if (reg == X86::EAX && (isStos || Name == "stosl"))
1965 else if (reg == X86::RAX && (isStos || Name == "stosq"))
1970 Operands.pop_back();
1971 Operands.pop_back();
1975 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1980 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
1982 if ((Name.startswith("shr") || Name.startswith("sar") ||
1983 Name.startswith("shl") || Name.startswith("sal") ||
1984 Name.startswith("rcl") || Name.startswith("rcr") ||
1985 Name.startswith("rol") || Name.startswith("ror")) &&
1986 Operands.size() == 3) {
1987 if (isParsingIntelSyntax()) {
1989 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
1990 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1991 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
1993 Operands.pop_back();
1996 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1997 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1998 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2000 Operands.erase(Operands.begin() + 1);
2005 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2006 // instalias with an immediate operand yet.
2007 if (Name == "int" && Operands.size() == 2) {
2008 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2009 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2010 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2012 Operands.erase(Operands.begin() + 1);
2013 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2020 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2023 TmpInst.setOpcode(Opcode);
2025 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2026 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2027 TmpInst.addOperand(Inst.getOperand(0));
2032 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2033 bool isCmp = false) {
2034 if (!Inst.getOperand(0).isImm() ||
2035 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2038 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2041 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2042 bool isCmp = false) {
2043 if (!Inst.getOperand(0).isImm() ||
2044 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2047 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2050 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2051 bool isCmp = false) {
2052 if (!Inst.getOperand(0).isImm() ||
2053 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2056 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2060 processInstruction(MCInst &Inst,
2061 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2062 switch (Inst.getOpcode()) {
2063 default: return false;
2064 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2065 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2066 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2067 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2068 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2069 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2070 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2071 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2072 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2073 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2074 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2075 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2076 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2077 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2078 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2079 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2080 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2081 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2082 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2083 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2084 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2085 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2086 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2087 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2091 static const char *getSubtargetFeatureName(unsigned Val);
2093 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2094 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2095 MCStreamer &Out, unsigned &ErrorInfo,
2096 bool MatchingInlineAsm) {
2097 assert(!Operands.empty() && "Unexpect empty operand list!");
2098 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2099 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2100 ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
2102 // First, handle aliases that expand to multiple instructions.
2103 // FIXME: This should be replaced with a real .td file alias mechanism.
2104 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2106 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2107 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2108 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2109 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2111 Inst.setOpcode(X86::WAIT);
2113 if (!MatchingInlineAsm)
2114 Out.EmitInstruction(Inst);
2117 StringSwitch<const char*>(Op->getToken())
2118 .Case("finit", "fninit")
2119 .Case("fsave", "fnsave")
2120 .Case("fstcw", "fnstcw")
2121 .Case("fstcww", "fnstcw")
2122 .Case("fstenv", "fnstenv")
2123 .Case("fstsw", "fnstsw")
2124 .Case("fstsww", "fnstsw")
2125 .Case("fclex", "fnclex")
2127 assert(Repl && "Unknown wait-prefixed instruction");
2129 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2132 bool WasOriginallyInvalidOperand = false;
2135 // First, try a direct match.
2136 switch (MatchInstructionImpl(Operands, Inst,
2137 ErrorInfo, MatchingInlineAsm,
2138 isParsingIntelSyntax())) {
2141 // Some instructions need post-processing to, for example, tweak which
2142 // encoding is selected. Loop on it while changes happen so the
2143 // individual transformations can chain off each other.
2144 if (!MatchingInlineAsm)
2145 while (processInstruction(Inst, Operands))
2149 if (!MatchingInlineAsm)
2150 Out.EmitInstruction(Inst);
2151 Opcode = Inst.getOpcode();
2153 case Match_MissingFeature: {
2154 assert(ErrorInfo && "Unknown missing feature!");
2155 // Special case the error message for the very common case where only
2156 // a single subtarget feature is missing.
2157 std::string Msg = "instruction requires:";
2159 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2160 if (ErrorInfo & Mask) {
2162 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2166 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2168 case Match_InvalidOperand:
2169 WasOriginallyInvalidOperand = true;
2171 case Match_MnemonicFail:
2175 // FIXME: Ideally, we would only attempt suffix matches for things which are
2176 // valid prefixes, and we could just infer the right unambiguous
2177 // type. However, that requires substantially more matcher support than the
2180 // Change the operand to point to a temporary token.
2181 StringRef Base = Op->getToken();
2182 SmallString<16> Tmp;
2185 Op->setTokenValue(Tmp.str());
2187 // If this instruction starts with an 'f', then it is a floating point stack
2188 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2189 // 80-bit floating point, which use the suffixes s,l,t respectively.
2191 // Otherwise, we assume that this may be an integer instruction, which comes
2192 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2193 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2195 // Check for the various suffix matches.
2196 Tmp[Base.size()] = Suffixes[0];
2197 unsigned ErrorInfoIgnore;
2198 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2199 unsigned Match1, Match2, Match3, Match4;
2201 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2202 isParsingIntelSyntax());
2203 // If this returned as a missing feature failure, remember that.
2204 if (Match1 == Match_MissingFeature)
2205 ErrorInfoMissingFeature = ErrorInfoIgnore;
2206 Tmp[Base.size()] = Suffixes[1];
2207 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2208 isParsingIntelSyntax());
2209 // If this returned as a missing feature failure, remember that.
2210 if (Match2 == Match_MissingFeature)
2211 ErrorInfoMissingFeature = ErrorInfoIgnore;
2212 Tmp[Base.size()] = Suffixes[2];
2213 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2214 isParsingIntelSyntax());
2215 // If this returned as a missing feature failure, remember that.
2216 if (Match3 == Match_MissingFeature)
2217 ErrorInfoMissingFeature = ErrorInfoIgnore;
2218 Tmp[Base.size()] = Suffixes[3];
2219 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2220 isParsingIntelSyntax());
2221 // If this returned as a missing feature failure, remember that.
2222 if (Match4 == Match_MissingFeature)
2223 ErrorInfoMissingFeature = ErrorInfoIgnore;
2225 // Restore the old token.
2226 Op->setTokenValue(Base);
2228 // If exactly one matched, then we treat that as a successful match (and the
2229 // instruction will already have been filled in correctly, since the failing
2230 // matches won't have modified it).
2231 unsigned NumSuccessfulMatches =
2232 (Match1 == Match_Success) + (Match2 == Match_Success) +
2233 (Match3 == Match_Success) + (Match4 == Match_Success);
2234 if (NumSuccessfulMatches == 1) {
2236 if (!MatchingInlineAsm)
2237 Out.EmitInstruction(Inst);
2238 Opcode = Inst.getOpcode();
2242 // Otherwise, the match failed, try to produce a decent error message.
2244 // If we had multiple suffix matches, then identify this as an ambiguous
2246 if (NumSuccessfulMatches > 1) {
2248 unsigned NumMatches = 0;
2249 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2250 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2251 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2252 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2254 SmallString<126> Msg;
2255 raw_svector_ostream OS(Msg);
2256 OS << "ambiguous instructions require an explicit suffix (could be ";
2257 for (unsigned i = 0; i != NumMatches; ++i) {
2260 if (i + 1 == NumMatches)
2262 OS << "'" << Base << MatchChars[i] << "'";
2265 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2269 // Okay, we know that none of the variants matched successfully.
2271 // If all of the instructions reported an invalid mnemonic, then the original
2272 // mnemonic was invalid.
2273 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2274 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2275 if (!WasOriginallyInvalidOperand) {
2276 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2278 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2279 Ranges, MatchingInlineAsm);
2282 // Recover location info for the operand if we know which was the problem.
2283 if (ErrorInfo != ~0U) {
2284 if (ErrorInfo >= Operands.size())
2285 return Error(IDLoc, "too few operands for instruction",
2286 EmptyRanges, MatchingInlineAsm);
2288 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2289 if (Operand->getStartLoc().isValid()) {
2290 SMRange OperandRange = Operand->getLocRange();
2291 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2292 OperandRange, MatchingInlineAsm);
2296 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2300 // If one instruction matched with a missing feature, report this as a
2302 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2303 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2304 std::string Msg = "instruction requires:";
2306 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2307 if (ErrorInfoMissingFeature & Mask) {
2309 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2313 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2316 // If one instruction matched with an invalid operand, report this as an
2318 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2319 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2320 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2325 // If all of these were an outright failure, report it in a useless way.
2326 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2327 EmptyRanges, MatchingInlineAsm);
2332 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2333 StringRef IDVal = DirectiveID.getIdentifier();
2334 if (IDVal == ".word")
2335 return ParseDirectiveWord(2, DirectiveID.getLoc());
2336 else if (IDVal.startswith(".code"))
2337 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2338 else if (IDVal.startswith(".att_syntax")) {
2339 getParser().setAssemblerDialect(0);
2341 } else if (IDVal.startswith(".intel_syntax")) {
2342 getParser().setAssemblerDialect(1);
2343 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2344 if(Parser.getTok().getString() == "noprefix") {
2345 // FIXME : Handle noprefix
2355 /// ParseDirectiveWord
2356 /// ::= .word [ expression (, expression)* ]
2357 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2358 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2360 const MCExpr *Value;
2361 if (getParser().parseExpression(Value))
2364 getParser().getStreamer().EmitValue(Value, Size);
2366 if (getLexer().is(AsmToken::EndOfStatement))
2369 // FIXME: Improve diagnostic.
2370 if (getLexer().isNot(AsmToken::Comma))
2371 return Error(L, "unexpected token in directive");
2380 /// ParseDirectiveCode
2381 /// ::= .code32 | .code64
2382 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2383 if (IDVal == ".code32") {
2385 if (is64BitMode()) {
2387 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2389 } else if (IDVal == ".code64") {
2391 if (!is64BitMode()) {
2393 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2396 return Error(L, "unexpected directive " + IDVal);
2402 // Force static initialization.
2403 extern "C" void LLVMInitializeX86AsmParser() {
2404 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2405 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2408 #define GET_REGISTER_MATCHER
2409 #define GET_MATCHER_IMPLEMENTATION
2410 #define GET_SUBTARGET_FEATURE_NAME
2411 #include "X86GenAsmMatcher.inc"