1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
41 static const char OpPrecedence[] = {
56 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 SMLoc Result = Parser.getTok().getLoc();
69 enum InfixCalculatorTok {
84 class InfixCalculator {
85 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
86 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
87 SmallVector<ICToken, 4> PostfixStack;
90 int64_t popOperand() {
91 assert (!PostfixStack.empty() && "Poped an empty stack!");
92 ICToken Op = PostfixStack.pop_back_val();
93 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
94 && "Expected and immediate or register!");
97 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
98 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
99 "Unexpected operand!");
100 PostfixStack.push_back(std::make_pair(Op, Val));
103 void popOperator() { InfixOperatorStack.pop_back(); }
104 void pushOperator(InfixCalculatorTok Op) {
105 // Push the new operator if the stack is empty.
106 if (InfixOperatorStack.empty()) {
107 InfixOperatorStack.push_back(Op);
111 // Push the new operator if it has a higher precedence than the operator
112 // on the top of the stack or the operator on the top of the stack is a
114 unsigned Idx = InfixOperatorStack.size() - 1;
115 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
116 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
117 InfixOperatorStack.push_back(Op);
121 // The operator on the top of the stack has higher precedence than the
123 unsigned ParenCount = 0;
125 // Nothing to process.
126 if (InfixOperatorStack.empty())
129 Idx = InfixOperatorStack.size() - 1;
130 StackOp = InfixOperatorStack[Idx];
131 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
134 // If we have an even parentheses count and we see a left parentheses,
135 // then stop processing.
136 if (!ParenCount && StackOp == IC_LPAREN)
139 if (StackOp == IC_RPAREN) {
141 InfixOperatorStack.pop_back();
142 } else if (StackOp == IC_LPAREN) {
144 InfixOperatorStack.pop_back();
146 InfixOperatorStack.pop_back();
147 PostfixStack.push_back(std::make_pair(StackOp, 0));
150 // Push the new operator.
151 InfixOperatorStack.push_back(Op);
154 // Push any remaining operators onto the postfix stack.
155 while (!InfixOperatorStack.empty()) {
156 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
157 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
158 PostfixStack.push_back(std::make_pair(StackOp, 0));
161 if (PostfixStack.empty())
164 SmallVector<ICToken, 16> OperandStack;
165 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
166 ICToken Op = PostfixStack[i];
167 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
168 OperandStack.push_back(Op);
170 assert (OperandStack.size() > 1 && "Too few operands.");
172 ICToken Op2 = OperandStack.pop_back_val();
173 ICToken Op1 = OperandStack.pop_back_val();
176 report_fatal_error("Unexpected operator!");
179 Val = Op1.second + Op2.second;
180 OperandStack.push_back(std::make_pair(IC_IMM, Val));
183 Val = Op1.second - Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188 "Multiply operation with an immediate and a register!");
189 Val = Op1.second * Op2.second;
190 OperandStack.push_back(std::make_pair(IC_IMM, Val));
193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
194 "Divide operation with an immediate and a register!");
195 assert (Op2.second != 0 && "Division by zero!");
196 Val = Op1.second / Op2.second;
197 OperandStack.push_back(std::make_pair(IC_IMM, Val));
200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201 "Or operation with an immediate and a register!");
202 Val = Op1.second | Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "And operation with an immediate and a register!");
208 Val = Op1.second & Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Left shift operation with an immediate and a register!");
214 Val = Op1.second << Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219 "Right shift operation with an immediate and a register!");
220 Val = Op1.second >> Op2.second;
221 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 assert (OperandStack.size() == 1 && "Expected a single result.");
227 return OperandStack.pop_back_val().second;
231 enum IntelExprState {
250 class IntelExprStateMachine {
251 IntelExprState State, PrevState;
252 unsigned BaseReg, IndexReg, TmpReg, Scale;
256 bool StopOnLBrac, AddImmPrefix;
258 InlineAsmIdentifierInfo Info;
260 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
261 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
262 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
263 AddImmPrefix(addimmprefix) { Info.clear(); }
265 unsigned getBaseReg() { return BaseReg; }
266 unsigned getIndexReg() { return IndexReg; }
267 unsigned getScale() { return Scale; }
268 const MCExpr *getSym() { return Sym; }
269 StringRef getSymName() { return SymName; }
270 int64_t getImm() { return Imm + IC.execute(); }
271 bool isValidEndState() {
272 return State == IES_RBRAC || State == IES_INTEGER;
274 bool getStopOnLBrac() { return StopOnLBrac; }
275 bool getAddImmPrefix() { return AddImmPrefix; }
276 bool hadError() { return State == IES_ERROR; }
278 InlineAsmIdentifierInfo &getIdentifierInfo() {
283 IntelExprState CurrState = State;
292 IC.pushOperator(IC_OR);
295 PrevState = CurrState;
298 IntelExprState CurrState = State;
307 IC.pushOperator(IC_AND);
310 PrevState = CurrState;
313 IntelExprState CurrState = State;
322 IC.pushOperator(IC_LSHIFT);
325 PrevState = CurrState;
328 IntelExprState CurrState = State;
337 IC.pushOperator(IC_RSHIFT);
340 PrevState = CurrState;
343 IntelExprState CurrState = State;
352 IC.pushOperator(IC_PLUS);
353 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
354 // If we already have a BaseReg, then assume this is the IndexReg with
359 assert (!IndexReg && "BaseReg/IndexReg already set!");
366 PrevState = CurrState;
369 IntelExprState CurrState = State;
384 // Only push the minus operator if it is not a unary operator.
385 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
386 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
387 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
388 IC.pushOperator(IC_MINUS);
389 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
390 // If we already have a BaseReg, then assume this is the IndexReg with
395 assert (!IndexReg && "BaseReg/IndexReg already set!");
402 PrevState = CurrState;
404 void onRegister(unsigned Reg) {
405 IntelExprState CurrState = State;
412 State = IES_REGISTER;
414 IC.pushOperand(IC_REGISTER);
417 // Index Register - Scale * Register
418 if (PrevState == IES_INTEGER) {
419 assert (!IndexReg && "IndexReg already set!");
420 State = IES_REGISTER;
422 // Get the scale and replace the 'Scale * Register' with '0'.
423 Scale = IC.popOperand();
424 IC.pushOperand(IC_IMM);
431 PrevState = CurrState;
433 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
443 SymName = SymRefName;
444 IC.pushOperand(IC_IMM);
448 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
449 IntelExprState CurrState = State;
464 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
465 // Index Register - Register * Scale
466 assert (!IndexReg && "IndexReg already set!");
469 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
470 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
473 // Get the scale and replace the 'Register * Scale' with '0'.
475 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
476 PrevState == IES_OR || PrevState == IES_AND ||
477 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
478 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
479 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
480 CurrState == IES_MINUS) {
481 // Unary minus. No need to pop the minus operand because it was never
483 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
485 IC.pushOperand(IC_IMM, TmpInt);
489 PrevState = CurrState;
501 State = IES_MULTIPLY;
502 IC.pushOperator(IC_MULTIPLY);
515 IC.pushOperator(IC_DIVIDE);
527 IC.pushOperator(IC_PLUS);
532 IntelExprState CurrState = State;
541 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
542 // If we already have a BaseReg, then assume this is the IndexReg with
547 assert (!IndexReg && "BaseReg/IndexReg already set!");
554 PrevState = CurrState;
557 IntelExprState CurrState = State;
571 // FIXME: We don't handle this type of unary minus, yet.
572 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
573 PrevState == IES_OR || PrevState == IES_AND ||
574 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
575 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
576 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
577 CurrState == IES_MINUS) {
582 IC.pushOperator(IC_LPAREN);
585 PrevState = CurrState;
597 IC.pushOperator(IC_RPAREN);
603 MCAsmParser &getParser() const { return Parser; }
605 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
607 bool Error(SMLoc L, const Twine &Msg,
608 ArrayRef<SMRange> Ranges = None,
609 bool MatchingInlineAsm = false) {
610 if (MatchingInlineAsm) return true;
611 return Parser.Error(L, Msg, Ranges);
614 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
615 ArrayRef<SMRange> Ranges = None,
616 bool MatchingInlineAsm = false) {
617 Parser.eatToEndOfStatement();
618 return Error(L, Msg, Ranges, MatchingInlineAsm);
621 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
626 X86Operand *DefaultMemSIOperand(SMLoc Loc);
627 X86Operand *DefaultMemDIOperand(SMLoc Loc);
628 X86Operand *ParseOperand();
629 X86Operand *ParseATTOperand();
630 X86Operand *ParseIntelOperand();
631 X86Operand *ParseIntelOffsetOfOperator();
632 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
633 X86Operand *ParseIntelOperator(unsigned OpKind);
634 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
635 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
637 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
638 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
639 int64_t ImmDisp, unsigned Size);
640 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
641 InlineAsmIdentifierInfo &Info,
642 bool IsUnevaluatedOperand, SMLoc &End);
644 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
646 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
647 unsigned BaseReg, unsigned IndexReg,
648 unsigned Scale, SMLoc Start, SMLoc End,
649 unsigned Size, StringRef Identifier,
650 InlineAsmIdentifierInfo &Info);
652 bool ParseDirectiveWord(unsigned Size, SMLoc L);
653 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
655 bool processInstruction(MCInst &Inst,
656 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
658 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
659 /// instrumentation around Inst.
660 void EmitInstruction(MCInst &Inst,
661 SmallVectorImpl<MCParsedAsmOperand *> &Operands,
664 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
665 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
666 MCStreamer &Out, unsigned &ErrorInfo,
667 bool MatchingInlineAsm) override;
669 /// doSrcDstMatch - Returns true if operands are matching in their
670 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
671 /// the parsing mode (Intel vs. AT&T).
672 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
674 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
675 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
676 /// \return \c true if no parsing errors occurred, \c false otherwise.
677 bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
678 const MCParsedAsmOperand &Op);
680 bool is64BitMode() const {
681 // FIXME: Can tablegen auto-generate this?
682 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
684 bool is32BitMode() const {
685 // FIXME: Can tablegen auto-generate this?
686 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
688 bool is16BitMode() const {
689 // FIXME: Can tablegen auto-generate this?
690 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
692 void SwitchMode(uint64_t mode) {
693 uint64_t oldMode = STI.getFeatureBits() &
694 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
695 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
696 setAvailableFeatures(FB);
697 assert(mode == (STI.getFeatureBits() &
698 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
701 bool isParsingIntelSyntax() {
702 return getParser().getAssemblerDialect();
705 /// @name Auto-generated Matcher Functions
708 #define GET_ASSEMBLER_HEADER
709 #include "X86GenAsmMatcher.inc"
714 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
715 const MCInstrInfo &mii,
716 const MCTargetOptions &Options)
717 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii), InstInfo(0) {
719 // Initialize the set of available features.
720 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
721 Instrumentation.reset(
722 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
725 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
728 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
729 SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
731 bool ParseDirective(AsmToken DirectiveID) override;
733 } // end anonymous namespace
735 /// @name Auto-generated Match Functions
738 static unsigned MatchRegisterName(StringRef Name);
742 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
744 // If we have both a base register and an index register make sure they are
745 // both 64-bit or 32-bit registers.
746 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
747 if (BaseReg != 0 && IndexReg != 0) {
748 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
749 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
750 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
751 IndexReg != X86::RIZ) {
752 ErrMsg = "base register is 64-bit, but index register is not";
755 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
756 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
757 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
758 IndexReg != X86::EIZ){
759 ErrMsg = "base register is 32-bit, but index register is not";
762 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
763 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
764 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
765 ErrMsg = "base register is 16-bit, but index register is not";
768 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
769 IndexReg != X86::SI && IndexReg != X86::DI) ||
770 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
771 IndexReg != X86::BX && IndexReg != X86::BP)) {
772 ErrMsg = "invalid 16-bit base/index register combination";
780 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
782 // Return true and let a normal complaint about bogus operands happen.
783 if (!Op1.isMem() || !Op2.isMem())
786 // Actually these might be the other way round if Intel syntax is
787 // being used. It doesn't matter.
788 unsigned diReg = Op1.Mem.BaseReg;
789 unsigned siReg = Op2.Mem.BaseReg;
791 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
792 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
793 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
794 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
795 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
796 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
797 // Again, return true and let another error happen.
801 bool X86AsmParser::ParseRegister(unsigned &RegNo,
802 SMLoc &StartLoc, SMLoc &EndLoc) {
804 const AsmToken &PercentTok = Parser.getTok();
805 StartLoc = PercentTok.getLoc();
807 // If we encounter a %, ignore it. This code handles registers with and
808 // without the prefix, unprefixed registers can occur in cfi directives.
809 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
810 Parser.Lex(); // Eat percent token.
812 const AsmToken &Tok = Parser.getTok();
813 EndLoc = Tok.getEndLoc();
815 if (Tok.isNot(AsmToken::Identifier)) {
816 if (isParsingIntelSyntax()) return true;
817 return Error(StartLoc, "invalid register name",
818 SMRange(StartLoc, EndLoc));
821 RegNo = MatchRegisterName(Tok.getString());
823 // If the match failed, try the register name as lowercase.
825 RegNo = MatchRegisterName(Tok.getString().lower());
827 if (!is64BitMode()) {
828 // FIXME: This should be done using Requires<Not64BitMode> and
829 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
831 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
833 if (RegNo == X86::RIZ ||
834 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
835 X86II::isX86_64NonExtLowByteReg(RegNo) ||
836 X86II::isX86_64ExtendedReg(RegNo))
837 return Error(StartLoc, "register %"
838 + Tok.getString() + " is only available in 64-bit mode",
839 SMRange(StartLoc, EndLoc));
842 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
843 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
845 Parser.Lex(); // Eat 'st'
847 // Check to see if we have '(4)' after %st.
848 if (getLexer().isNot(AsmToken::LParen))
853 const AsmToken &IntTok = Parser.getTok();
854 if (IntTok.isNot(AsmToken::Integer))
855 return Error(IntTok.getLoc(), "expected stack index");
856 switch (IntTok.getIntVal()) {
857 case 0: RegNo = X86::ST0; break;
858 case 1: RegNo = X86::ST1; break;
859 case 2: RegNo = X86::ST2; break;
860 case 3: RegNo = X86::ST3; break;
861 case 4: RegNo = X86::ST4; break;
862 case 5: RegNo = X86::ST5; break;
863 case 6: RegNo = X86::ST6; break;
864 case 7: RegNo = X86::ST7; break;
865 default: return Error(IntTok.getLoc(), "invalid stack index");
868 if (getParser().Lex().isNot(AsmToken::RParen))
869 return Error(Parser.getTok().getLoc(), "expected ')'");
871 EndLoc = Parser.getTok().getEndLoc();
872 Parser.Lex(); // Eat ')'
876 EndLoc = Parser.getTok().getEndLoc();
878 // If this is "db[0-7]", match it as an alias
880 if (RegNo == 0 && Tok.getString().size() == 3 &&
881 Tok.getString().startswith("db")) {
882 switch (Tok.getString()[2]) {
883 case '0': RegNo = X86::DR0; break;
884 case '1': RegNo = X86::DR1; break;
885 case '2': RegNo = X86::DR2; break;
886 case '3': RegNo = X86::DR3; break;
887 case '4': RegNo = X86::DR4; break;
888 case '5': RegNo = X86::DR5; break;
889 case '6': RegNo = X86::DR6; break;
890 case '7': RegNo = X86::DR7; break;
894 EndLoc = Parser.getTok().getEndLoc();
895 Parser.Lex(); // Eat it.
901 if (isParsingIntelSyntax()) return true;
902 return Error(StartLoc, "invalid register name",
903 SMRange(StartLoc, EndLoc));
906 Parser.Lex(); // Eat identifier token.
910 X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
912 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
913 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
914 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
915 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
918 X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
920 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
921 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
922 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
923 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
926 X86Operand *X86AsmParser::ParseOperand() {
927 if (isParsingIntelSyntax())
928 return ParseIntelOperand();
929 return ParseATTOperand();
932 /// getIntelMemOperandSize - Return intel memory operand size.
933 static unsigned getIntelMemOperandSize(StringRef OpStr) {
934 unsigned Size = StringSwitch<unsigned>(OpStr)
935 .Cases("BYTE", "byte", 8)
936 .Cases("WORD", "word", 16)
937 .Cases("DWORD", "dword", 32)
938 .Cases("QWORD", "qword", 64)
939 .Cases("XWORD", "xword", 80)
940 .Cases("XMMWORD", "xmmword", 128)
941 .Cases("YMMWORD", "ymmword", 256)
942 .Cases("ZMMWORD", "zmmword", 512)
943 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
949 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
950 unsigned BaseReg, unsigned IndexReg,
951 unsigned Scale, SMLoc Start, SMLoc End,
952 unsigned Size, StringRef Identifier,
953 InlineAsmIdentifierInfo &Info){
954 // If this is not a VarDecl then assume it is a FuncDecl or some other label
955 // reference. We need an 'r' constraint here, so we need to create register
956 // operand to ensure proper matching. Just pick a GPR based on the size of
958 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
960 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
961 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
962 SMLoc(), Identifier, Info.OpDecl);
965 // We either have a direct symbol reference, or an offset from a symbol. The
966 // parser always puts the symbol on the LHS, so look there for size
967 // calculation purposes.
968 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
970 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
973 Size = Info.Type * 8; // Size is in terms of bits in this context.
975 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
980 // When parsing inline assembly we set the base register to a non-zero value
981 // if we don't know the actual value at this time. This is necessary to
982 // get the matching correct in some cases.
983 BaseReg = BaseReg ? BaseReg : 1;
984 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
985 End, Size, Identifier, Info.OpDecl);
989 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
990 StringRef SymName, int64_t ImmDisp,
991 int64_t FinalImmDisp, SMLoc &BracLoc,
992 SMLoc &StartInBrac, SMLoc &End) {
993 // Remove the '[' and ']' from the IR string.
994 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
995 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
997 // If ImmDisp is non-zero, then we parsed a displacement before the
998 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
999 // If ImmDisp doesn't match the displacement computed by the state machine
1000 // then we have an additional displacement in the bracketed expression.
1001 if (ImmDisp != FinalImmDisp) {
1003 // We have an immediate displacement before the bracketed expression.
1004 // Adjust this to match the final immediate displacement.
1006 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1007 E = AsmRewrites->end(); I != E; ++I) {
1008 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1010 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1011 assert (!Found && "ImmDisp already rewritten.");
1012 (*I).Kind = AOK_Imm;
1013 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1014 (*I).Val = FinalImmDisp;
1019 assert (Found && "Unable to rewrite ImmDisp.");
1022 // We have a symbolic and an immediate displacement, but no displacement
1023 // before the bracketed expression. Put the immediate displacement
1024 // before the bracketed expression.
1025 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1028 // Remove all the ImmPrefix rewrites within the brackets.
1029 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1030 E = AsmRewrites->end(); I != E; ++I) {
1031 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1033 if ((*I).Kind == AOK_ImmPrefix)
1034 (*I).Kind = AOK_Delete;
1036 const char *SymLocPtr = SymName.data();
1037 // Skip everything before the symbol.
1038 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1039 assert(Len > 0 && "Expected a non-negative length.");
1040 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1042 // Skip everything after the symbol.
1043 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1044 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1045 assert(Len > 0 && "Expected a non-negative length.");
1046 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1050 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1051 const AsmToken &Tok = Parser.getTok();
1055 bool UpdateLocLex = true;
1057 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1058 // identifier. Don't try an parse it as a register.
1059 if (Tok.getString().startswith("."))
1062 // If we're parsing an immediate expression, we don't expect a '['.
1063 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1066 switch (getLexer().getKind()) {
1068 if (SM.isValidEndState()) {
1072 return Error(Tok.getLoc(), "unknown token in expression");
1074 case AsmToken::EndOfStatement: {
1078 case AsmToken::Identifier: {
1079 // This could be a register or a symbolic displacement.
1082 SMLoc IdentLoc = Tok.getLoc();
1083 StringRef Identifier = Tok.getString();
1084 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1085 SM.onRegister(TmpReg);
1086 UpdateLocLex = false;
1089 if (!isParsingInlineAsm()) {
1090 if (getParser().parsePrimaryExpr(Val, End))
1091 return Error(Tok.getLoc(), "Unexpected identifier!");
1093 // This is a dot operator, not an adjacent identifier.
1094 if (Identifier.find('.') != StringRef::npos) {
1097 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1098 if (ParseIntelIdentifier(Val, Identifier, Info,
1099 /*Unevaluated=*/false, End))
1103 SM.onIdentifierExpr(Val, Identifier);
1104 UpdateLocLex = false;
1107 return Error(Tok.getLoc(), "Unexpected identifier!");
1109 case AsmToken::Integer: {
1111 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1112 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1114 // Look for 'b' or 'f' following an Integer as a directional label
1115 SMLoc Loc = getTok().getLoc();
1116 int64_t IntVal = getTok().getIntVal();
1117 End = consumeToken();
1118 UpdateLocLex = false;
1119 if (getLexer().getKind() == AsmToken::Identifier) {
1120 StringRef IDVal = getTok().getString();
1121 if (IDVal == "f" || IDVal == "b") {
1123 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1124 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1126 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1127 if (IDVal == "b" && Sym->isUndefined())
1128 return Error(Loc, "invalid reference to undefined symbol");
1129 StringRef Identifier = Sym->getName();
1130 SM.onIdentifierExpr(Val, Identifier);
1131 End = consumeToken();
1133 if (SM.onInteger(IntVal, ErrMsg))
1134 return Error(Loc, ErrMsg);
1137 if (SM.onInteger(IntVal, ErrMsg))
1138 return Error(Loc, ErrMsg);
1142 case AsmToken::Plus: SM.onPlus(); break;
1143 case AsmToken::Minus: SM.onMinus(); break;
1144 case AsmToken::Star: SM.onStar(); break;
1145 case AsmToken::Slash: SM.onDivide(); break;
1146 case AsmToken::Pipe: SM.onOr(); break;
1147 case AsmToken::Amp: SM.onAnd(); break;
1148 case AsmToken::LessLess:
1149 SM.onLShift(); break;
1150 case AsmToken::GreaterGreater:
1151 SM.onRShift(); break;
1152 case AsmToken::LBrac: SM.onLBrac(); break;
1153 case AsmToken::RBrac: SM.onRBrac(); break;
1154 case AsmToken::LParen: SM.onLParen(); break;
1155 case AsmToken::RParen: SM.onRParen(); break;
1158 return Error(Tok.getLoc(), "unknown token in expression");
1160 if (!Done && UpdateLocLex)
1161 End = consumeToken();
1166 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1169 const AsmToken &Tok = Parser.getTok();
1170 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1171 if (getLexer().isNot(AsmToken::LBrac))
1172 return ErrorOperand(BracLoc, "Expected '[' token!");
1173 Parser.Lex(); // Eat '['
1175 SMLoc StartInBrac = Tok.getLoc();
1176 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1177 // may have already parsed an immediate displacement before the bracketed
1179 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1180 if (ParseIntelExpression(SM, End))
1183 const MCExpr *Disp = 0;
1184 if (const MCExpr *Sym = SM.getSym()) {
1185 // A symbolic displacement.
1187 if (isParsingInlineAsm())
1188 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1189 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1193 if (SM.getImm() || !Disp) {
1194 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1196 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1198 Disp = Imm; // An immediate displacement only.
1201 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1202 // will in fact do global lookup the field name inside all global typedefs,
1203 // but we don't emulate that.
1204 if (Tok.getString().find('.') != StringRef::npos) {
1205 const MCExpr *NewDisp;
1206 if (ParseIntelDotOperator(Disp, NewDisp))
1209 End = Tok.getEndLoc();
1210 Parser.Lex(); // Eat the field.
1214 int BaseReg = SM.getBaseReg();
1215 int IndexReg = SM.getIndexReg();
1216 int Scale = SM.getScale();
1217 if (!isParsingInlineAsm()) {
1219 if (!BaseReg && !IndexReg) {
1221 return X86Operand::CreateMem(Disp, Start, End, Size);
1223 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1226 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1227 Error(StartInBrac, ErrMsg);
1230 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1234 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1235 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1236 End, Size, SM.getSymName(), Info);
1239 // Inline assembly may use variable names with namespace alias qualifiers.
1240 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1241 StringRef &Identifier,
1242 InlineAsmIdentifierInfo &Info,
1243 bool IsUnevaluatedOperand, SMLoc &End) {
1244 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1247 StringRef LineBuf(Identifier.data());
1248 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1250 const AsmToken &Tok = Parser.getTok();
1252 // Advance the token stream until the end of the current token is
1253 // after the end of what the frontend claimed.
1254 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1256 End = Tok.getEndLoc();
1259 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1260 if (End.getPointer() == EndPtr) break;
1263 // Create the symbol reference.
1264 Identifier = LineBuf;
1265 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1266 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1267 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1271 /// \brief Parse intel style segment override.
1272 X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
1275 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1276 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1277 if (Tok.isNot(AsmToken::Colon))
1278 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1279 Parser.Lex(); // Eat ':'
1281 int64_t ImmDisp = 0;
1282 if (getLexer().is(AsmToken::Integer)) {
1283 ImmDisp = Tok.getIntVal();
1284 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1286 if (isParsingInlineAsm())
1287 InstInfo->AsmRewrites->push_back(
1288 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1290 if (getLexer().isNot(AsmToken::LBrac)) {
1291 // An immediate following a 'segment register', 'colon' token sequence can
1292 // be followed by a bracketed expression. If it isn't we know we have our
1293 // final segment override.
1294 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1295 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1296 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1301 if (getLexer().is(AsmToken::LBrac))
1302 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1306 if (!isParsingInlineAsm()) {
1307 if (getParser().parsePrimaryExpr(Val, End))
1308 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1310 return X86Operand::CreateMem(Val, Start, End, Size);
1313 InlineAsmIdentifierInfo Info;
1314 StringRef Identifier = Tok.getString();
1315 if (ParseIntelIdentifier(Val, Identifier, Info,
1316 /*Unevaluated=*/false, End))
1318 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1319 /*Scale=*/1, Start, End, Size, Identifier, Info);
1322 /// ParseIntelMemOperand - Parse intel style memory operand.
1323 X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
1325 const AsmToken &Tok = Parser.getTok();
1328 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1329 if (getLexer().is(AsmToken::LBrac))
1330 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1331 assert(ImmDisp == 0);
1334 if (!isParsingInlineAsm()) {
1335 if (getParser().parsePrimaryExpr(Val, End))
1336 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1338 return X86Operand::CreateMem(Val, Start, End, Size);
1341 InlineAsmIdentifierInfo Info;
1342 StringRef Identifier = Tok.getString();
1343 if (ParseIntelIdentifier(Val, Identifier, Info,
1344 /*Unevaluated=*/false, End))
1347 if (!getLexer().is(AsmToken::LBrac))
1348 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1349 /*Scale=*/1, Start, End, Size, Identifier, Info);
1351 Parser.Lex(); // Eat '['
1353 // Parse Identifier [ ImmDisp ]
1354 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1355 /*AddImmPrefix=*/false);
1356 if (ParseIntelExpression(SM, End))
1360 Error(Start, "cannot use more than one symbol in memory operand");
1363 if (SM.getBaseReg()) {
1364 Error(Start, "cannot use base register with variable reference");
1367 if (SM.getIndexReg()) {
1368 Error(Start, "cannot use index register with variable reference");
1372 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1373 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1374 // we're pointing to a local variable in memory, so the base register is
1375 // really the frame or stack pointer.
1376 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1377 /*Scale=*/1, Start, End, Size, Identifier,
1381 /// Parse the '.' operator.
1382 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1383 const MCExpr *&NewDisp) {
1384 const AsmToken &Tok = Parser.getTok();
1385 int64_t OrigDispVal, DotDispVal;
1387 // FIXME: Handle non-constant expressions.
1388 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1389 OrigDispVal = OrigDisp->getValue();
1391 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1393 // Drop the optional '.'.
1394 StringRef DotDispStr = Tok.getString();
1395 if (DotDispStr.startswith("."))
1396 DotDispStr = DotDispStr.drop_front(1);
1398 // .Imm gets lexed as a real.
1399 if (Tok.is(AsmToken::Real)) {
1401 DotDispStr.getAsInteger(10, DotDisp);
1402 DotDispVal = DotDisp.getZExtValue();
1403 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1405 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1406 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1408 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1409 DotDispVal = DotDisp;
1411 return Error(Tok.getLoc(), "Unexpected token type!");
1413 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1414 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1415 unsigned Len = DotDispStr.size();
1416 unsigned Val = OrigDispVal + DotDispVal;
1417 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1421 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1425 /// Parse the 'offset' operator. This operator is used to specify the
1426 /// location rather then the content of a variable.
1427 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1428 const AsmToken &Tok = Parser.getTok();
1429 SMLoc OffsetOfLoc = Tok.getLoc();
1430 Parser.Lex(); // Eat offset.
1433 InlineAsmIdentifierInfo Info;
1434 SMLoc Start = Tok.getLoc(), End;
1435 StringRef Identifier = Tok.getString();
1436 if (ParseIntelIdentifier(Val, Identifier, Info,
1437 /*Unevaluated=*/false, End))
1440 // Don't emit the offset operator.
1441 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1443 // The offset operator will have an 'r' constraint, thus we need to create
1444 // register operand to ensure proper matching. Just pick a GPR based on
1445 // the size of a pointer.
1447 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1448 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1449 OffsetOfLoc, Identifier, Info.OpDecl);
1452 enum IntelOperatorKind {
1458 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1459 /// returns the number of elements in an array. It returns the value 1 for
1460 /// non-array variables. The SIZE operator returns the size of a C or C++
1461 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1462 /// TYPE operator returns the size of a C or C++ type or variable. If the
1463 /// variable is an array, TYPE returns the size of a single element.
1464 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1465 const AsmToken &Tok = Parser.getTok();
1466 SMLoc TypeLoc = Tok.getLoc();
1467 Parser.Lex(); // Eat operator.
1469 const MCExpr *Val = 0;
1470 InlineAsmIdentifierInfo Info;
1471 SMLoc Start = Tok.getLoc(), End;
1472 StringRef Identifier = Tok.getString();
1473 if (ParseIntelIdentifier(Val, Identifier, Info,
1474 /*Unevaluated=*/true, End))
1478 return ErrorOperand(Start, "unable to lookup expression");
1482 default: llvm_unreachable("Unexpected operand kind!");
1483 case IOK_LENGTH: CVal = Info.Length; break;
1484 case IOK_SIZE: CVal = Info.Size; break;
1485 case IOK_TYPE: CVal = Info.Type; break;
1488 // Rewrite the type operator and the C or C++ type or variable in terms of an
1489 // immediate. E.g. TYPE foo -> $$4
1490 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1491 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1493 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1494 return X86Operand::CreateImm(Imm, Start, End);
1497 X86Operand *X86AsmParser::ParseIntelOperand() {
1498 const AsmToken &Tok = Parser.getTok();
1501 // Offset, length, type and size operators.
1502 if (isParsingInlineAsm()) {
1503 StringRef AsmTokStr = Tok.getString();
1504 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1505 return ParseIntelOffsetOfOperator();
1506 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1507 return ParseIntelOperator(IOK_LENGTH);
1508 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1509 return ParseIntelOperator(IOK_SIZE);
1510 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1511 return ParseIntelOperator(IOK_TYPE);
1514 unsigned Size = getIntelMemOperandSize(Tok.getString());
1516 Parser.Lex(); // Eat operand size (e.g., byte, word).
1517 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1518 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1519 Parser.Lex(); // Eat ptr.
1521 Start = Tok.getLoc();
1524 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1525 getLexer().is(AsmToken::LParen)) {
1526 AsmToken StartTok = Tok;
1527 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1528 /*AddImmPrefix=*/false);
1529 if (ParseIntelExpression(SM, End))
1532 int64_t Imm = SM.getImm();
1533 if (isParsingInlineAsm()) {
1534 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1535 if (StartTok.getString().size() == Len)
1536 // Just add a prefix if this wasn't a complex immediate expression.
1537 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1539 // Otherwise, rewrite the complex expression as a single immediate.
1540 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1543 if (getLexer().isNot(AsmToken::LBrac)) {
1544 // If a directional label (ie. 1f or 2b) was parsed above from
1545 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1546 // to the MCExpr with the directional local symbol and this is a
1547 // memory operand not an immediate operand.
1549 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1551 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1552 return X86Operand::CreateImm(ImmExpr, Start, End);
1555 // Only positive immediates are valid.
1557 return ErrorOperand(Start, "expected a positive immediate displacement "
1558 "before bracketed expr.");
1560 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1561 return ParseIntelMemOperand(Imm, Start, Size);
1566 if (!ParseRegister(RegNo, Start, End)) {
1567 // If this is a segment register followed by a ':', then this is the start
1568 // of a segment override, otherwise this is a normal register reference.
1569 if (getLexer().isNot(AsmToken::Colon))
1570 return X86Operand::CreateReg(RegNo, Start, End);
1572 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1576 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1579 X86Operand *X86AsmParser::ParseATTOperand() {
1580 switch (getLexer().getKind()) {
1582 // Parse a memory operand with no segment register.
1583 return ParseMemOperand(0, Parser.getTok().getLoc());
1584 case AsmToken::Percent: {
1585 // Read the register.
1588 if (ParseRegister(RegNo, Start, End)) return 0;
1589 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1590 Error(Start, "%eiz and %riz can only be used as index registers",
1591 SMRange(Start, End));
1595 // If this is a segment register followed by a ':', then this is the start
1596 // of a memory reference, otherwise this is a normal register reference.
1597 if (getLexer().isNot(AsmToken::Colon))
1598 return X86Operand::CreateReg(RegNo, Start, End);
1600 getParser().Lex(); // Eat the colon.
1601 return ParseMemOperand(RegNo, Start);
1603 case AsmToken::Dollar: {
1604 // $42 -> immediate.
1605 SMLoc Start = Parser.getTok().getLoc(), End;
1608 if (getParser().parseExpression(Val, End))
1610 return X86Operand::CreateImm(Val, Start, End);
1616 X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1617 const MCParsedAsmOperand &Op) {
1618 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1619 if (getLexer().is(AsmToken::LCurly)) {
1620 // Eat "{" and mark the current place.
1621 const SMLoc consumedToken = consumeToken();
1622 // Distinguish {1to<NUM>} from {%k<NUM>}.
1623 if(getLexer().is(AsmToken::Integer)) {
1624 // Parse memory broadcasting ({1to<NUM>}).
1625 if (getLexer().getTok().getIntVal() != 1)
1626 return !ErrorAndEatStatement(getLexer().getLoc(),
1627 "Expected 1to<NUM> at this point");
1628 Parser.Lex(); // Eat "1" of 1to8
1629 if (!getLexer().is(AsmToken::Identifier) ||
1630 !getLexer().getTok().getIdentifier().startswith("to"))
1631 return !ErrorAndEatStatement(getLexer().getLoc(),
1632 "Expected 1to<NUM> at this point");
1633 // Recognize only reasonable suffixes.
1634 const char *BroadcastPrimitive =
1635 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1636 .Case("to8", "{1to8}")
1637 .Case("to16", "{1to16}")
1639 if (!BroadcastPrimitive)
1640 return !ErrorAndEatStatement(getLexer().getLoc(),
1641 "Invalid memory broadcast primitive.");
1642 Parser.Lex(); // Eat "toN" of 1toN
1643 if (!getLexer().is(AsmToken::RCurly))
1644 return !ErrorAndEatStatement(getLexer().getLoc(),
1645 "Expected } at this point");
1646 Parser.Lex(); // Eat "}"
1647 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1649 // No AVX512 specific primitives can pass
1650 // after memory broadcasting, so return.
1653 // Parse mask register {%k1}
1654 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1655 if (X86Operand *Op = ParseOperand()) {
1656 Operands.push_back(Op);
1657 if (!getLexer().is(AsmToken::RCurly))
1658 return !ErrorAndEatStatement(getLexer().getLoc(),
1659 "Expected } at this point");
1660 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1662 // Parse "zeroing non-masked" semantic {z}
1663 if (getLexer().is(AsmToken::LCurly)) {
1664 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1665 if (!getLexer().is(AsmToken::Identifier) ||
1666 getLexer().getTok().getIdentifier() != "z")
1667 return !ErrorAndEatStatement(getLexer().getLoc(),
1668 "Expected z at this point");
1669 Parser.Lex(); // Eat the z
1670 if (!getLexer().is(AsmToken::RCurly))
1671 return !ErrorAndEatStatement(getLexer().getLoc(),
1672 "Expected } at this point");
1673 Parser.Lex(); // Eat the }
1682 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1683 /// has already been parsed if present.
1684 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1686 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1687 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1688 // only way to do this without lookahead is to eat the '(' and see what is
1690 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1691 if (getLexer().isNot(AsmToken::LParen)) {
1693 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1695 // After parsing the base expression we could either have a parenthesized
1696 // memory address or not. If not, return now. If so, eat the (.
1697 if (getLexer().isNot(AsmToken::LParen)) {
1698 // Unless we have a segment register, treat this as an immediate.
1700 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1701 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1707 // Okay, we have a '('. We don't know if this is an expression or not, but
1708 // so we have to eat the ( to see beyond it.
1709 SMLoc LParenLoc = Parser.getTok().getLoc();
1710 Parser.Lex(); // Eat the '('.
1712 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1713 // Nothing to do here, fall into the code below with the '(' part of the
1714 // memory operand consumed.
1718 // It must be an parenthesized expression, parse it now.
1719 if (getParser().parseParenExpression(Disp, ExprEnd))
1722 // After parsing the base expression we could either have a parenthesized
1723 // memory address or not. If not, return now. If so, eat the (.
1724 if (getLexer().isNot(AsmToken::LParen)) {
1725 // Unless we have a segment register, treat this as an immediate.
1727 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1728 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1736 // If we reached here, then we just ate the ( of the memory operand. Process
1737 // the rest of the memory operand.
1738 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1739 SMLoc IndexLoc, BaseLoc;
1741 if (getLexer().is(AsmToken::Percent)) {
1742 SMLoc StartLoc, EndLoc;
1743 BaseLoc = Parser.getTok().getLoc();
1744 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1745 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1746 Error(StartLoc, "eiz and riz can only be used as index registers",
1747 SMRange(StartLoc, EndLoc));
1752 if (getLexer().is(AsmToken::Comma)) {
1753 Parser.Lex(); // Eat the comma.
1754 IndexLoc = Parser.getTok().getLoc();
1756 // Following the comma we should have either an index register, or a scale
1757 // value. We don't support the later form, but we want to parse it
1760 // Not that even though it would be completely consistent to support syntax
1761 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1762 if (getLexer().is(AsmToken::Percent)) {
1764 if (ParseRegister(IndexReg, L, L)) return 0;
1766 if (getLexer().isNot(AsmToken::RParen)) {
1767 // Parse the scale amount:
1768 // ::= ',' [scale-expression]
1769 if (getLexer().isNot(AsmToken::Comma)) {
1770 Error(Parser.getTok().getLoc(),
1771 "expected comma in scale expression");
1774 Parser.Lex(); // Eat the comma.
1776 if (getLexer().isNot(AsmToken::RParen)) {
1777 SMLoc Loc = Parser.getTok().getLoc();
1780 if (getParser().parseAbsoluteExpression(ScaleVal)){
1781 Error(Loc, "expected scale expression");
1785 // Validate the scale amount.
1786 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1788 Error(Loc, "scale factor in 16-bit address must be 1");
1791 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1792 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1795 Scale = (unsigned)ScaleVal;
1798 } else if (getLexer().isNot(AsmToken::RParen)) {
1799 // A scale amount without an index is ignored.
1801 SMLoc Loc = Parser.getTok().getLoc();
1804 if (getParser().parseAbsoluteExpression(Value))
1808 Warning(Loc, "scale factor without index register is ignored");
1813 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1814 if (getLexer().isNot(AsmToken::RParen)) {
1815 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1818 SMLoc MemEnd = Parser.getTok().getEndLoc();
1819 Parser.Lex(); // Eat the ')'.
1821 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1822 // and then only in non-64-bit modes. Except for DX, which is a special case
1823 // because an unofficial form of in/out instructions uses it.
1824 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1825 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1826 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1827 BaseReg != X86::DX) {
1828 Error(BaseLoc, "invalid 16-bit base register");
1832 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1833 Error(IndexLoc, "16-bit memory operand may not include only index register");
1838 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1839 Error(BaseLoc, ErrMsg);
1843 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1848 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1849 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1851 StringRef PatchedName = Name;
1853 // FIXME: Hack to recognize setneb as setne.
1854 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1855 PatchedName != "setb" && PatchedName != "setnb")
1856 PatchedName = PatchedName.substr(0, Name.size()-1);
1858 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1859 const MCExpr *ExtraImmOp = 0;
1860 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1861 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1862 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1863 bool IsVCMP = PatchedName[0] == 'v';
1864 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1865 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1866 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1870 .Case("unord", 0x03)
1875 /* AVX only from here */
1876 .Case("eq_uq", 0x08)
1879 .Case("false", 0x0B)
1880 .Case("neq_oq", 0x0C)
1884 .Case("eq_os", 0x10)
1885 .Case("lt_oq", 0x11)
1886 .Case("le_oq", 0x12)
1887 .Case("unord_s", 0x13)
1888 .Case("neq_us", 0x14)
1889 .Case("nlt_uq", 0x15)
1890 .Case("nle_uq", 0x16)
1891 .Case("ord_s", 0x17)
1892 .Case("eq_us", 0x18)
1893 .Case("nge_uq", 0x19)
1894 .Case("ngt_uq", 0x1A)
1895 .Case("false_os", 0x1B)
1896 .Case("neq_os", 0x1C)
1897 .Case("ge_oq", 0x1D)
1898 .Case("gt_oq", 0x1E)
1899 .Case("true_us", 0x1F)
1901 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1902 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1903 getParser().getContext());
1904 if (PatchedName.endswith("ss")) {
1905 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1906 } else if (PatchedName.endswith("sd")) {
1907 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1908 } else if (PatchedName.endswith("ps")) {
1909 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1911 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1912 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1917 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1919 if (ExtraImmOp && !isParsingIntelSyntax())
1920 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1922 // Determine whether this is an instruction prefix.
1924 Name == "lock" || Name == "rep" ||
1925 Name == "repe" || Name == "repz" ||
1926 Name == "repne" || Name == "repnz" ||
1927 Name == "rex64" || Name == "data16";
1930 // This does the actual operand parsing. Don't parse any more if we have a
1931 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1932 // just want to parse the "lock" as the first instruction and the "incl" as
1934 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1936 // Parse '*' modifier.
1937 if (getLexer().is(AsmToken::Star))
1938 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1940 // Read the operands.
1942 if (X86Operand *Op = ParseOperand()) {
1943 Operands.push_back(Op);
1944 if (!HandleAVX512Operand(Operands, *Op))
1947 Parser.eatToEndOfStatement();
1950 // check for comma and eat it
1951 if (getLexer().is(AsmToken::Comma))
1957 if (getLexer().isNot(AsmToken::EndOfStatement))
1958 return ErrorAndEatStatement(getLexer().getLoc(),
1959 "unexpected token in argument list");
1962 // Consume the EndOfStatement or the prefix separator Slash
1963 if (getLexer().is(AsmToken::EndOfStatement) ||
1964 (isPrefix && getLexer().is(AsmToken::Slash)))
1967 if (ExtraImmOp && isParsingIntelSyntax())
1968 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1970 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1971 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1972 // documented form in various unofficial manuals, so a lot of code uses it.
1973 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1974 Operands.size() == 3) {
1975 X86Operand &Op = *(X86Operand*)Operands.back();
1976 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1977 isa<MCConstantExpr>(Op.Mem.Disp) &&
1978 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1979 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1980 SMLoc Loc = Op.getEndLoc();
1981 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1985 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1986 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1987 Operands.size() == 3) {
1988 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1989 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1990 isa<MCConstantExpr>(Op.Mem.Disp) &&
1991 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1992 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1993 SMLoc Loc = Op.getEndLoc();
1994 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1999 // Append default arguments to "ins[bwld]"
2000 if (Name.startswith("ins") && Operands.size() == 1 &&
2001 (Name == "insb" || Name == "insw" || Name == "insl" ||
2003 if (isParsingIntelSyntax()) {
2004 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2005 Operands.push_back(DefaultMemDIOperand(NameLoc));
2007 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2008 Operands.push_back(DefaultMemDIOperand(NameLoc));
2012 // Append default arguments to "outs[bwld]"
2013 if (Name.startswith("outs") && Operands.size() == 1 &&
2014 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2015 Name == "outsd" )) {
2016 if (isParsingIntelSyntax()) {
2017 Operands.push_back(DefaultMemSIOperand(NameLoc));
2018 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2020 Operands.push_back(DefaultMemSIOperand(NameLoc));
2021 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2025 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2026 // values of $SIREG according to the mode. It would be nice if this
2027 // could be achieved with InstAlias in the tables.
2028 if (Name.startswith("lods") && Operands.size() == 1 &&
2029 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2030 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2031 Operands.push_back(DefaultMemSIOperand(NameLoc));
2033 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2034 // values of $DIREG according to the mode. It would be nice if this
2035 // could be achieved with InstAlias in the tables.
2036 if (Name.startswith("stos") && Operands.size() == 1 &&
2037 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2038 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2039 Operands.push_back(DefaultMemDIOperand(NameLoc));
2041 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2042 // values of $DIREG according to the mode. It would be nice if this
2043 // could be achieved with InstAlias in the tables.
2044 if (Name.startswith("scas") && Operands.size() == 1 &&
2045 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2046 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2047 Operands.push_back(DefaultMemDIOperand(NameLoc));
2049 // Add default SI and DI operands to "cmps[bwlq]".
2050 if (Name.startswith("cmps") &&
2051 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2052 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2053 if (Operands.size() == 1) {
2054 if (isParsingIntelSyntax()) {
2055 Operands.push_back(DefaultMemSIOperand(NameLoc));
2056 Operands.push_back(DefaultMemDIOperand(NameLoc));
2058 Operands.push_back(DefaultMemDIOperand(NameLoc));
2059 Operands.push_back(DefaultMemSIOperand(NameLoc));
2061 } else if (Operands.size() == 3) {
2062 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2063 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2064 if (!doSrcDstMatch(Op, Op2))
2065 return Error(Op.getStartLoc(),
2066 "mismatching source and destination index registers");
2070 // Add default SI and DI operands to "movs[bwlq]".
2071 if ((Name.startswith("movs") &&
2072 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2073 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2074 (Name.startswith("smov") &&
2075 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2076 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2077 if (Operands.size() == 1) {
2078 if (Name == "movsd") {
2079 delete Operands.back();
2080 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2082 if (isParsingIntelSyntax()) {
2083 Operands.push_back(DefaultMemDIOperand(NameLoc));
2084 Operands.push_back(DefaultMemSIOperand(NameLoc));
2086 Operands.push_back(DefaultMemSIOperand(NameLoc));
2087 Operands.push_back(DefaultMemDIOperand(NameLoc));
2089 } else if (Operands.size() == 3) {
2090 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2091 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2092 if (!doSrcDstMatch(Op, Op2))
2093 return Error(Op.getStartLoc(),
2094 "mismatching source and destination index registers");
2098 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2100 if ((Name.startswith("shr") || Name.startswith("sar") ||
2101 Name.startswith("shl") || Name.startswith("sal") ||
2102 Name.startswith("rcl") || Name.startswith("rcr") ||
2103 Name.startswith("rol") || Name.startswith("ror")) &&
2104 Operands.size() == 3) {
2105 if (isParsingIntelSyntax()) {
2107 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2108 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2109 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2111 Operands.pop_back();
2114 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2115 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2116 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2118 Operands.erase(Operands.begin() + 1);
2123 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2124 // instalias with an immediate operand yet.
2125 if (Name == "int" && Operands.size() == 2) {
2126 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2127 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2128 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2130 Operands.erase(Operands.begin() + 1);
2131 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2138 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2141 TmpInst.setOpcode(Opcode);
2143 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2144 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2145 TmpInst.addOperand(Inst.getOperand(0));
2150 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2151 bool isCmp = false) {
2152 if (!Inst.getOperand(0).isImm() ||
2153 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2156 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2159 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2160 bool isCmp = false) {
2161 if (!Inst.getOperand(0).isImm() ||
2162 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2165 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2168 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2169 bool isCmp = false) {
2170 if (!Inst.getOperand(0).isImm() ||
2171 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2174 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2178 processInstruction(MCInst &Inst,
2179 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2180 switch (Inst.getOpcode()) {
2181 default: return false;
2182 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2183 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2184 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2185 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2186 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2187 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2188 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2189 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2190 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2191 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2192 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2193 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2194 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2195 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2196 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2197 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2198 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2199 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2200 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2201 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2202 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2203 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2204 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2205 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2206 case X86::VMOVAPDrr:
2207 case X86::VMOVAPDYrr:
2208 case X86::VMOVAPSrr:
2209 case X86::VMOVAPSYrr:
2210 case X86::VMOVDQArr:
2211 case X86::VMOVDQAYrr:
2212 case X86::VMOVDQUrr:
2213 case X86::VMOVDQUYrr:
2214 case X86::VMOVUPDrr:
2215 case X86::VMOVUPDYrr:
2216 case X86::VMOVUPSrr:
2217 case X86::VMOVUPSYrr: {
2218 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2219 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2223 switch (Inst.getOpcode()) {
2224 default: llvm_unreachable("Invalid opcode");
2225 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2226 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2227 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2228 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2229 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2230 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2231 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2232 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2233 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2234 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2235 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2236 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2238 Inst.setOpcode(NewOpc);
2242 case X86::VMOVSSrr: {
2243 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2244 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2247 switch (Inst.getOpcode()) {
2248 default: llvm_unreachable("Invalid opcode");
2249 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2250 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2252 Inst.setOpcode(NewOpc);
2258 static const char *getSubtargetFeatureName(unsigned Val);
2260 void X86AsmParser::EmitInstruction(
2261 MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
2263 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
2265 Out.EmitInstruction(Inst, STI);
2269 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2270 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2271 MCStreamer &Out, unsigned &ErrorInfo,
2272 bool MatchingInlineAsm) {
2273 assert(!Operands.empty() && "Unexpect empty operand list!");
2274 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2275 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2276 ArrayRef<SMRange> EmptyRanges = None;
2278 // First, handle aliases that expand to multiple instructions.
2279 // FIXME: This should be replaced with a real .td file alias mechanism.
2280 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2282 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2283 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2284 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2285 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2287 Inst.setOpcode(X86::WAIT);
2289 if (!MatchingInlineAsm)
2290 EmitInstruction(Inst, Operands, Out);
2293 StringSwitch<const char*>(Op->getToken())
2294 .Case("finit", "fninit")
2295 .Case("fsave", "fnsave")
2296 .Case("fstcw", "fnstcw")
2297 .Case("fstcww", "fnstcw")
2298 .Case("fstenv", "fnstenv")
2299 .Case("fstsw", "fnstsw")
2300 .Case("fstsww", "fnstsw")
2301 .Case("fclex", "fnclex")
2303 assert(Repl && "Unknown wait-prefixed instruction");
2305 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2308 bool WasOriginallyInvalidOperand = false;
2311 // First, try a direct match.
2312 switch (MatchInstructionImpl(Operands, Inst,
2313 ErrorInfo, MatchingInlineAsm,
2314 isParsingIntelSyntax())) {
2317 // Some instructions need post-processing to, for example, tweak which
2318 // encoding is selected. Loop on it while changes happen so the
2319 // individual transformations can chain off each other.
2320 if (!MatchingInlineAsm)
2321 while (processInstruction(Inst, Operands))
2325 if (!MatchingInlineAsm)
2326 EmitInstruction(Inst, Operands, Out);
2327 Opcode = Inst.getOpcode();
2329 case Match_MissingFeature: {
2330 assert(ErrorInfo && "Unknown missing feature!");
2331 // Special case the error message for the very common case where only
2332 // a single subtarget feature is missing.
2333 std::string Msg = "instruction requires:";
2335 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2336 if (ErrorInfo & Mask) {
2338 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2342 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2344 case Match_InvalidOperand:
2345 WasOriginallyInvalidOperand = true;
2347 case Match_MnemonicFail:
2351 // FIXME: Ideally, we would only attempt suffix matches for things which are
2352 // valid prefixes, and we could just infer the right unambiguous
2353 // type. However, that requires substantially more matcher support than the
2356 // Change the operand to point to a temporary token.
2357 StringRef Base = Op->getToken();
2358 SmallString<16> Tmp;
2361 Op->setTokenValue(Tmp.str());
2363 // If this instruction starts with an 'f', then it is a floating point stack
2364 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2365 // 80-bit floating point, which use the suffixes s,l,t respectively.
2367 // Otherwise, we assume that this may be an integer instruction, which comes
2368 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2369 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2371 // Check for the various suffix matches.
2372 Tmp[Base.size()] = Suffixes[0];
2373 unsigned ErrorInfoIgnore;
2374 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2375 unsigned Match1, Match2, Match3, Match4;
2377 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2378 MatchingInlineAsm, isParsingIntelSyntax());
2379 // If this returned as a missing feature failure, remember that.
2380 if (Match1 == Match_MissingFeature)
2381 ErrorInfoMissingFeature = ErrorInfoIgnore;
2382 Tmp[Base.size()] = Suffixes[1];
2383 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2384 MatchingInlineAsm, isParsingIntelSyntax());
2385 // If this returned as a missing feature failure, remember that.
2386 if (Match2 == Match_MissingFeature)
2387 ErrorInfoMissingFeature = ErrorInfoIgnore;
2388 Tmp[Base.size()] = Suffixes[2];
2389 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2390 MatchingInlineAsm, isParsingIntelSyntax());
2391 // If this returned as a missing feature failure, remember that.
2392 if (Match3 == Match_MissingFeature)
2393 ErrorInfoMissingFeature = ErrorInfoIgnore;
2394 Tmp[Base.size()] = Suffixes[3];
2395 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2396 MatchingInlineAsm, isParsingIntelSyntax());
2397 // If this returned as a missing feature failure, remember that.
2398 if (Match4 == Match_MissingFeature)
2399 ErrorInfoMissingFeature = ErrorInfoIgnore;
2401 // Restore the old token.
2402 Op->setTokenValue(Base);
2404 // If exactly one matched, then we treat that as a successful match (and the
2405 // instruction will already have been filled in correctly, since the failing
2406 // matches won't have modified it).
2407 unsigned NumSuccessfulMatches =
2408 (Match1 == Match_Success) + (Match2 == Match_Success) +
2409 (Match3 == Match_Success) + (Match4 == Match_Success);
2410 if (NumSuccessfulMatches == 1) {
2412 if (!MatchingInlineAsm)
2413 EmitInstruction(Inst, Operands, Out);
2414 Opcode = Inst.getOpcode();
2418 // Otherwise, the match failed, try to produce a decent error message.
2420 // If we had multiple suffix matches, then identify this as an ambiguous
2422 if (NumSuccessfulMatches > 1) {
2424 unsigned NumMatches = 0;
2425 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2426 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2427 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2428 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2430 SmallString<126> Msg;
2431 raw_svector_ostream OS(Msg);
2432 OS << "ambiguous instructions require an explicit suffix (could be ";
2433 for (unsigned i = 0; i != NumMatches; ++i) {
2436 if (i + 1 == NumMatches)
2438 OS << "'" << Base << MatchChars[i] << "'";
2441 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2445 // Okay, we know that none of the variants matched successfully.
2447 // If all of the instructions reported an invalid mnemonic, then the original
2448 // mnemonic was invalid.
2449 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2450 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2451 if (!WasOriginallyInvalidOperand) {
2452 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2454 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2455 Ranges, MatchingInlineAsm);
2458 // Recover location info for the operand if we know which was the problem.
2459 if (ErrorInfo != ~0U) {
2460 if (ErrorInfo >= Operands.size())
2461 return Error(IDLoc, "too few operands for instruction",
2462 EmptyRanges, MatchingInlineAsm);
2464 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2465 if (Operand->getStartLoc().isValid()) {
2466 SMRange OperandRange = Operand->getLocRange();
2467 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2468 OperandRange, MatchingInlineAsm);
2472 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2476 // If one instruction matched with a missing feature, report this as a
2478 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2479 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2480 std::string Msg = "instruction requires:";
2482 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2483 if (ErrorInfoMissingFeature & Mask) {
2485 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2489 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2492 // If one instruction matched with an invalid operand, report this as an
2494 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2495 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2496 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2501 // If all of these were an outright failure, report it in a useless way.
2502 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2503 EmptyRanges, MatchingInlineAsm);
2508 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2509 StringRef IDVal = DirectiveID.getIdentifier();
2510 if (IDVal == ".word")
2511 return ParseDirectiveWord(2, DirectiveID.getLoc());
2512 else if (IDVal.startswith(".code"))
2513 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2514 else if (IDVal.startswith(".att_syntax")) {
2515 getParser().setAssemblerDialect(0);
2517 } else if (IDVal.startswith(".intel_syntax")) {
2518 getParser().setAssemblerDialect(1);
2519 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2520 // FIXME: Handle noprefix
2521 if (Parser.getTok().getString() == "noprefix")
2529 /// ParseDirectiveWord
2530 /// ::= .word [ expression (, expression)* ]
2531 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2532 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2534 const MCExpr *Value;
2535 if (getParser().parseExpression(Value))
2538 getParser().getStreamer().EmitValue(Value, Size);
2540 if (getLexer().is(AsmToken::EndOfStatement))
2543 // FIXME: Improve diagnostic.
2544 if (getLexer().isNot(AsmToken::Comma)) {
2545 Error(L, "unexpected token in directive");
2556 /// ParseDirectiveCode
2557 /// ::= .code16 | .code32 | .code64
2558 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2559 if (IDVal == ".code16") {
2561 if (!is16BitMode()) {
2562 SwitchMode(X86::Mode16Bit);
2563 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2565 } else if (IDVal == ".code32") {
2567 if (!is32BitMode()) {
2568 SwitchMode(X86::Mode32Bit);
2569 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2571 } else if (IDVal == ".code64") {
2573 if (!is64BitMode()) {
2574 SwitchMode(X86::Mode64Bit);
2575 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2578 Error(L, "unknown directive " + IDVal);
2585 // Force static initialization.
2586 extern "C" void LLVMInitializeX86AsmParser() {
2587 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2588 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2591 #define GET_REGISTER_MATCHER
2592 #define GET_MATCHER_IMPLEMENTATION
2593 #define GET_SUBTARGET_FEATURE_NAME
2594 #include "X86GenAsmMatcher.inc"