1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringSwitch.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCParser/MCAsmLexer.h"
21 #include "llvm/MC/MCParser/MCAsmParser.h"
22 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCStreamer.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCTargetAsmParser.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/TargetRegistry.h"
30 #include "llvm/Support/raw_ostream.h"
37 static const char OpPrecedence[] = {
52 class X86AsmParser : public MCTargetAsmParser {
55 ParseInstructionInfo *InstInfo;
57 SMLoc consumeToken() {
58 SMLoc Result = Parser.getTok().getLoc();
63 enum InfixCalculatorTok {
78 class InfixCalculator {
79 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
80 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
81 SmallVector<ICToken, 4> PostfixStack;
84 int64_t popOperand() {
85 assert (!PostfixStack.empty() && "Poped an empty stack!");
86 ICToken Op = PostfixStack.pop_back_val();
87 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
88 && "Expected and immediate or register!");
91 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
92 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
93 "Unexpected operand!");
94 PostfixStack.push_back(std::make_pair(Op, Val));
97 void popOperator() { InfixOperatorStack.pop_back(); }
98 void pushOperator(InfixCalculatorTok Op) {
99 // Push the new operator if the stack is empty.
100 if (InfixOperatorStack.empty()) {
101 InfixOperatorStack.push_back(Op);
105 // Push the new operator if it has a higher precedence than the operator
106 // on the top of the stack or the operator on the top of the stack is a
108 unsigned Idx = InfixOperatorStack.size() - 1;
109 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
110 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
111 InfixOperatorStack.push_back(Op);
115 // The operator on the top of the stack has higher precedence than the
117 unsigned ParenCount = 0;
119 // Nothing to process.
120 if (InfixOperatorStack.empty())
123 Idx = InfixOperatorStack.size() - 1;
124 StackOp = InfixOperatorStack[Idx];
125 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
128 // If we have an even parentheses count and we see a left parentheses,
129 // then stop processing.
130 if (!ParenCount && StackOp == IC_LPAREN)
133 if (StackOp == IC_RPAREN) {
135 InfixOperatorStack.pop_back();
136 } else if (StackOp == IC_LPAREN) {
138 InfixOperatorStack.pop_back();
140 InfixOperatorStack.pop_back();
141 PostfixStack.push_back(std::make_pair(StackOp, 0));
144 // Push the new operator.
145 InfixOperatorStack.push_back(Op);
148 // Push any remaining operators onto the postfix stack.
149 while (!InfixOperatorStack.empty()) {
150 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
151 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
152 PostfixStack.push_back(std::make_pair(StackOp, 0));
155 if (PostfixStack.empty())
158 SmallVector<ICToken, 16> OperandStack;
159 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
160 ICToken Op = PostfixStack[i];
161 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
162 OperandStack.push_back(Op);
164 assert (OperandStack.size() > 1 && "Too few operands.");
166 ICToken Op2 = OperandStack.pop_back_val();
167 ICToken Op1 = OperandStack.pop_back_val();
170 report_fatal_error("Unexpected operator!");
173 Val = Op1.second + Op2.second;
174 OperandStack.push_back(std::make_pair(IC_IMM, Val));
177 Val = Op1.second - Op2.second;
178 OperandStack.push_back(std::make_pair(IC_IMM, Val));
181 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
182 "Multiply operation with an immediate and a register!");
183 Val = Op1.second * Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188 "Divide operation with an immediate and a register!");
189 assert (Op2.second != 0 && "Division by zero!");
190 Val = Op1.second / Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Or operation with an immediate and a register!");
196 Val = Op1.second | Op2.second;
197 OperandStack.push_back(std::make_pair(IC_IMM, Val));
200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201 "And operation with an immediate and a register!");
202 Val = Op1.second & Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "Left shift operation with an immediate and a register!");
208 Val = Op1.second << Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Right shift operation with an immediate and a register!");
214 Val = Op1.second >> Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
220 assert (OperandStack.size() == 1 && "Expected a single result.");
221 return OperandStack.pop_back_val().second;
225 enum IntelExprState {
244 class IntelExprStateMachine {
245 IntelExprState State, PrevState;
246 unsigned BaseReg, IndexReg, TmpReg, Scale;
250 bool StopOnLBrac, AddImmPrefix;
252 InlineAsmIdentifierInfo Info;
254 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
255 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
256 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
257 AddImmPrefix(addimmprefix) { Info.clear(); }
259 unsigned getBaseReg() { return BaseReg; }
260 unsigned getIndexReg() { return IndexReg; }
261 unsigned getScale() { return Scale; }
262 const MCExpr *getSym() { return Sym; }
263 StringRef getSymName() { return SymName; }
264 int64_t getImm() { return Imm + IC.execute(); }
265 bool isValidEndState() {
266 return State == IES_RBRAC || State == IES_INTEGER;
268 bool getStopOnLBrac() { return StopOnLBrac; }
269 bool getAddImmPrefix() { return AddImmPrefix; }
270 bool hadError() { return State == IES_ERROR; }
272 InlineAsmIdentifierInfo &getIdentifierInfo() {
277 IntelExprState CurrState = State;
286 IC.pushOperator(IC_OR);
289 PrevState = CurrState;
292 IntelExprState CurrState = State;
301 IC.pushOperator(IC_AND);
304 PrevState = CurrState;
307 IntelExprState CurrState = State;
316 IC.pushOperator(IC_LSHIFT);
319 PrevState = CurrState;
322 IntelExprState CurrState = State;
331 IC.pushOperator(IC_RSHIFT);
334 PrevState = CurrState;
337 IntelExprState CurrState = State;
346 IC.pushOperator(IC_PLUS);
347 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
348 // If we already have a BaseReg, then assume this is the IndexReg with
353 assert (!IndexReg && "BaseReg/IndexReg already set!");
360 PrevState = CurrState;
363 IntelExprState CurrState = State;
378 // Only push the minus operator if it is not a unary operator.
379 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
380 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
381 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
382 IC.pushOperator(IC_MINUS);
383 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
384 // If we already have a BaseReg, then assume this is the IndexReg with
389 assert (!IndexReg && "BaseReg/IndexReg already set!");
396 PrevState = CurrState;
398 void onRegister(unsigned Reg) {
399 IntelExprState CurrState = State;
406 State = IES_REGISTER;
408 IC.pushOperand(IC_REGISTER);
411 // Index Register - Scale * Register
412 if (PrevState == IES_INTEGER) {
413 assert (!IndexReg && "IndexReg already set!");
414 State = IES_REGISTER;
416 // Get the scale and replace the 'Scale * Register' with '0'.
417 Scale = IC.popOperand();
418 IC.pushOperand(IC_IMM);
425 PrevState = CurrState;
427 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
437 SymName = SymRefName;
438 IC.pushOperand(IC_IMM);
442 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
443 IntelExprState CurrState = State;
458 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
459 // Index Register - Register * Scale
460 assert (!IndexReg && "IndexReg already set!");
463 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
464 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
467 // Get the scale and replace the 'Register * Scale' with '0'.
469 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
470 PrevState == IES_OR || PrevState == IES_AND ||
471 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
472 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
473 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
474 CurrState == IES_MINUS) {
475 // Unary minus. No need to pop the minus operand because it was never
477 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
479 IC.pushOperand(IC_IMM, TmpInt);
483 PrevState = CurrState;
495 State = IES_MULTIPLY;
496 IC.pushOperator(IC_MULTIPLY);
509 IC.pushOperator(IC_DIVIDE);
521 IC.pushOperator(IC_PLUS);
526 IntelExprState CurrState = State;
535 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
536 // If we already have a BaseReg, then assume this is the IndexReg with
541 assert (!IndexReg && "BaseReg/IndexReg already set!");
548 PrevState = CurrState;
551 IntelExprState CurrState = State;
565 // FIXME: We don't handle this type of unary minus, yet.
566 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
567 PrevState == IES_OR || PrevState == IES_AND ||
568 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
569 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
570 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
571 CurrState == IES_MINUS) {
576 IC.pushOperator(IC_LPAREN);
579 PrevState = CurrState;
591 IC.pushOperator(IC_RPAREN);
597 MCAsmParser &getParser() const { return Parser; }
599 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
601 bool Error(SMLoc L, const Twine &Msg,
602 ArrayRef<SMRange> Ranges = None,
603 bool MatchingInlineAsm = false) {
604 if (MatchingInlineAsm) return true;
605 return Parser.Error(L, Msg, Ranges);
608 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
609 ArrayRef<SMRange> Ranges = None,
610 bool MatchingInlineAsm = false) {
611 Parser.eatToEndOfStatement();
612 return Error(L, Msg, Ranges, MatchingInlineAsm);
615 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
620 X86Operand *DefaultMemSIOperand(SMLoc Loc);
621 X86Operand *DefaultMemDIOperand(SMLoc Loc);
622 X86Operand *ParseOperand();
623 X86Operand *ParseATTOperand();
624 X86Operand *ParseIntelOperand();
625 X86Operand *ParseIntelOffsetOfOperator();
626 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
627 X86Operand *ParseIntelOperator(unsigned OpKind);
628 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
629 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
631 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
632 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
633 int64_t ImmDisp, unsigned Size);
634 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
635 InlineAsmIdentifierInfo &Info,
636 bool IsUnevaluatedOperand, SMLoc &End);
638 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
640 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
641 unsigned BaseReg, unsigned IndexReg,
642 unsigned Scale, SMLoc Start, SMLoc End,
643 unsigned Size, StringRef Identifier,
644 InlineAsmIdentifierInfo &Info);
646 bool ParseDirectiveWord(unsigned Size, SMLoc L);
647 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
649 bool processInstruction(MCInst &Inst,
650 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
652 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
653 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
654 MCStreamer &Out, unsigned &ErrorInfo,
655 bool MatchingInlineAsm);
657 /// doSrcDstMatch - Returns true if operands are matching in their
658 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
659 /// the parsing mode (Intel vs. AT&T).
660 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
662 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
663 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
664 /// \return \c true if no parsing errors occurred, \c false otherwise.
665 bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
666 const MCParsedAsmOperand &Op);
668 bool is64BitMode() const {
669 // FIXME: Can tablegen auto-generate this?
670 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
672 bool is32BitMode() const {
673 // FIXME: Can tablegen auto-generate this?
674 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
676 bool is16BitMode() const {
677 // FIXME: Can tablegen auto-generate this?
678 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
680 void SwitchMode(uint64_t mode) {
681 uint64_t oldMode = STI.getFeatureBits() &
682 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
683 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
684 setAvailableFeatures(FB);
685 assert(mode == (STI.getFeatureBits() &
686 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
689 bool isParsingIntelSyntax() {
690 return getParser().getAssemblerDialect();
693 /// @name Auto-generated Matcher Functions
696 #define GET_ASSEMBLER_HEADER
697 #include "X86GenAsmMatcher.inc"
702 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
703 const MCInstrInfo &MII)
704 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
706 // Initialize the set of available features.
707 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
709 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
711 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
713 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
715 virtual bool ParseDirective(AsmToken DirectiveID);
717 } // end anonymous namespace
719 /// @name Auto-generated Match Functions
722 static unsigned MatchRegisterName(StringRef Name);
726 static bool isImmSExti16i8Value(uint64_t Value) {
727 return (( Value <= 0x000000000000007FULL)||
728 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
729 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
732 static bool isImmSExti32i8Value(uint64_t Value) {
733 return (( Value <= 0x000000000000007FULL)||
734 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
735 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
738 static bool isImmZExtu32u8Value(uint64_t Value) {
739 return (Value <= 0x00000000000000FFULL);
742 static bool isImmSExti64i8Value(uint64_t Value) {
743 return (( Value <= 0x000000000000007FULL)||
744 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
747 static bool isImmSExti64i32Value(uint64_t Value) {
748 return (( Value <= 0x000000007FFFFFFFULL)||
749 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
753 /// X86Operand - Instances of this class represent a parsed X86 machine
755 struct X86Operand : public MCParsedAsmOperand {
763 SMLoc StartLoc, EndLoc;
798 X86Operand(KindTy K, SMLoc Start, SMLoc End)
799 : Kind(K), StartLoc(Start), EndLoc(End) {}
801 StringRef getSymName() { return SymName; }
802 void *getOpDecl() { return OpDecl; }
804 /// getStartLoc - Get the location of the first token of this operand.
805 SMLoc getStartLoc() const { return StartLoc; }
806 /// getEndLoc - Get the location of the last token of this operand.
807 SMLoc getEndLoc() const { return EndLoc; }
808 /// getLocRange - Get the range between the first and last token of this
810 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
811 /// getOffsetOfLoc - Get the location of the offset operator.
812 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
814 virtual void print(raw_ostream &OS) const {}
816 StringRef getToken() const {
817 assert(Kind == Token && "Invalid access!");
818 return StringRef(Tok.Data, Tok.Length);
820 void setTokenValue(StringRef Value) {
821 assert(Kind == Token && "Invalid access!");
822 Tok.Data = Value.data();
823 Tok.Length = Value.size();
826 unsigned getReg() const {
827 assert(Kind == Register && "Invalid access!");
831 const MCExpr *getImm() const {
832 assert(Kind == Immediate && "Invalid access!");
836 const MCExpr *getMemDisp() const {
837 assert(Kind == Memory && "Invalid access!");
840 unsigned getMemSegReg() const {
841 assert(Kind == Memory && "Invalid access!");
844 unsigned getMemBaseReg() const {
845 assert(Kind == Memory && "Invalid access!");
848 unsigned getMemIndexReg() const {
849 assert(Kind == Memory && "Invalid access!");
852 unsigned getMemScale() const {
853 assert(Kind == Memory && "Invalid access!");
857 bool isToken() const {return Kind == Token; }
859 bool isImm() const { return Kind == Immediate; }
861 bool isImmSExti16i8() const {
865 // If this isn't a constant expr, just assume it fits and let relaxation
867 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
871 // Otherwise, check the value is in a range that makes sense for this
873 return isImmSExti16i8Value(CE->getValue());
875 bool isImmSExti32i8() const {
879 // If this isn't a constant expr, just assume it fits and let relaxation
881 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
885 // Otherwise, check the value is in a range that makes sense for this
887 return isImmSExti32i8Value(CE->getValue());
889 bool isImmZExtu32u8() const {
893 // If this isn't a constant expr, just assume it fits and let relaxation
895 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
899 // Otherwise, check the value is in a range that makes sense for this
901 return isImmZExtu32u8Value(CE->getValue());
903 bool isImmSExti64i8() const {
907 // If this isn't a constant expr, just assume it fits and let relaxation
909 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
913 // Otherwise, check the value is in a range that makes sense for this
915 return isImmSExti64i8Value(CE->getValue());
917 bool isImmSExti64i32() const {
921 // If this isn't a constant expr, just assume it fits and let relaxation
923 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
927 // Otherwise, check the value is in a range that makes sense for this
929 return isImmSExti64i32Value(CE->getValue());
932 bool isOffsetOf() const {
933 return OffsetOfLoc.getPointer();
936 bool needAddressOf() const {
940 bool isMem() const { return Kind == Memory; }
941 bool isMem8() const {
942 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
944 bool isMem16() const {
945 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
947 bool isMem32() const {
948 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
950 bool isMem64() const {
951 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
953 bool isMem80() const {
954 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
956 bool isMem128() const {
957 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
959 bool isMem256() const {
960 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
962 bool isMem512() const {
963 return Kind == Memory && (!Mem.Size || Mem.Size == 512);
966 bool isMemVX32() const {
967 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
968 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
970 bool isMemVY32() const {
971 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
972 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
974 bool isMemVX64() const {
975 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
976 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
978 bool isMemVY64() const {
979 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
980 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
982 bool isMemVZ32() const {
983 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
984 getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
986 bool isMemVZ64() const {
987 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
988 getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
991 bool isAbsMem() const {
992 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
993 !getMemIndexReg() && getMemScale() == 1;
996 bool isSrcIdx() const {
997 return !getMemIndexReg() && getMemScale() == 1 &&
998 (getMemBaseReg() == X86::RSI || getMemBaseReg() == X86::ESI ||
999 getMemBaseReg() == X86::SI) && isa<MCConstantExpr>(getMemDisp()) &&
1000 cast<MCConstantExpr>(getMemDisp())->getValue() == 0;
1002 bool isSrcIdx8() const {
1003 return isMem8() && isSrcIdx();
1005 bool isSrcIdx16() const {
1006 return isMem16() && isSrcIdx();
1008 bool isSrcIdx32() const {
1009 return isMem32() && isSrcIdx();
1011 bool isSrcIdx64() const {
1012 return isMem64() && isSrcIdx();
1015 bool isDstIdx() const {
1016 return !getMemIndexReg() && getMemScale() == 1 &&
1017 (getMemSegReg() == 0 || getMemSegReg() == X86::ES) &&
1018 (getMemBaseReg() == X86::RDI || getMemBaseReg() == X86::EDI ||
1019 getMemBaseReg() == X86::DI) && isa<MCConstantExpr>(getMemDisp()) &&
1020 cast<MCConstantExpr>(getMemDisp())->getValue() == 0;
1022 bool isDstIdx8() const {
1023 return isMem8() && isDstIdx();
1025 bool isDstIdx16() const {
1026 return isMem16() && isDstIdx();
1028 bool isDstIdx32() const {
1029 return isMem32() && isDstIdx();
1031 bool isDstIdx64() const {
1032 return isMem64() && isDstIdx();
1035 bool isMemOffs8() const {
1036 return Kind == Memory && !getMemBaseReg() &&
1037 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 8);
1039 bool isMemOffs16() const {
1040 return Kind == Memory && !getMemBaseReg() &&
1041 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 16);
1043 bool isMemOffs32() const {
1044 return Kind == Memory && !getMemBaseReg() &&
1045 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 32);
1047 bool isMemOffs64() const {
1048 return Kind == Memory && !getMemBaseReg() &&
1049 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 64);
1052 bool isReg() const { return Kind == Register; }
1054 bool isGR32orGR64() const {
1055 return Kind == Register &&
1056 (X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
1057 X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
1060 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
1061 // Add as immediates when possible.
1062 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
1063 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
1065 Inst.addOperand(MCOperand::CreateExpr(Expr));
1068 void addRegOperands(MCInst &Inst, unsigned N) const {
1069 assert(N == 1 && "Invalid number of operands!");
1070 Inst.addOperand(MCOperand::CreateReg(getReg()));
1073 static unsigned getGR32FromGR64(unsigned RegNo) {
1075 default: llvm_unreachable("Unexpected register");
1076 case X86::RAX: return X86::EAX;
1077 case X86::RCX: return X86::ECX;
1078 case X86::RDX: return X86::EDX;
1079 case X86::RBX: return X86::EBX;
1080 case X86::RBP: return X86::EBP;
1081 case X86::RSP: return X86::ESP;
1082 case X86::RSI: return X86::ESI;
1083 case X86::RDI: return X86::EDI;
1084 case X86::R8: return X86::R8D;
1085 case X86::R9: return X86::R9D;
1086 case X86::R10: return X86::R10D;
1087 case X86::R11: return X86::R11D;
1088 case X86::R12: return X86::R12D;
1089 case X86::R13: return X86::R13D;
1090 case X86::R14: return X86::R14D;
1091 case X86::R15: return X86::R15D;
1092 case X86::RIP: return X86::EIP;
1096 void addGR32orGR64Operands(MCInst &Inst, unsigned N) const {
1097 assert(N == 1 && "Invalid number of operands!");
1098 unsigned RegNo = getReg();
1099 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
1100 RegNo = getGR32FromGR64(RegNo);
1101 Inst.addOperand(MCOperand::CreateReg(RegNo));
1104 void addImmOperands(MCInst &Inst, unsigned N) const {
1105 assert(N == 1 && "Invalid number of operands!");
1106 addExpr(Inst, getImm());
1109 void addMemOperands(MCInst &Inst, unsigned N) const {
1110 assert((N == 5) && "Invalid number of operands!");
1111 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
1112 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
1113 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
1114 addExpr(Inst, getMemDisp());
1115 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
1118 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
1119 assert((N == 1) && "Invalid number of operands!");
1120 // Add as immediates when possible.
1121 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
1122 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
1124 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
1127 void addSrcIdxOperands(MCInst &Inst, unsigned N) const {
1128 assert((N == 2) && "Invalid number of operands!");
1129 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
1130 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
1132 void addDstIdxOperands(MCInst &Inst, unsigned N) const {
1133 assert((N == 1) && "Invalid number of operands!");
1134 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
1137 void addMemOffsOperands(MCInst &Inst, unsigned N) const {
1138 assert((N == 2) && "Invalid number of operands!");
1139 // Add as immediates when possible.
1140 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
1141 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
1143 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
1144 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
1147 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
1148 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
1149 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
1150 Res->Tok.Data = Str.data();
1151 Res->Tok.Length = Str.size();
1155 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
1156 bool AddressOf = false,
1157 SMLoc OffsetOfLoc = SMLoc(),
1158 StringRef SymName = StringRef(),
1160 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
1161 Res->Reg.RegNo = RegNo;
1162 Res->AddressOf = AddressOf;
1163 Res->OffsetOfLoc = OffsetOfLoc;
1164 Res->SymName = SymName;
1165 Res->OpDecl = OpDecl;
1169 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
1170 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
1175 /// Create an absolute memory operand.
1176 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
1177 unsigned Size = 0, StringRef SymName = StringRef(),
1179 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
1180 Res->Mem.SegReg = 0;
1181 Res->Mem.Disp = Disp;
1182 Res->Mem.BaseReg = 0;
1183 Res->Mem.IndexReg = 0;
1185 Res->Mem.Size = Size;
1186 Res->SymName = SymName;
1187 Res->OpDecl = OpDecl;
1188 Res->AddressOf = false;
1192 /// Create a generalized memory operand.
1193 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
1194 unsigned BaseReg, unsigned IndexReg,
1195 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
1197 StringRef SymName = StringRef(),
1199 // We should never just have a displacement, that should be parsed as an
1200 // absolute memory operand.
1201 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
1203 // The scale should always be one of {1,2,4,8}.
1204 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
1206 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
1207 Res->Mem.SegReg = SegReg;
1208 Res->Mem.Disp = Disp;
1209 Res->Mem.BaseReg = BaseReg;
1210 Res->Mem.IndexReg = IndexReg;
1211 Res->Mem.Scale = Scale;
1212 Res->Mem.Size = Size;
1213 Res->SymName = SymName;
1214 Res->OpDecl = OpDecl;
1215 Res->AddressOf = false;
1220 } // end anonymous namespace.
1222 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
1223 StringRef &ErrMsg) {
1224 // If we have both a base register and an index register make sure they are
1225 // both 64-bit or 32-bit registers.
1226 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1227 if (BaseReg != 0 && IndexReg != 0) {
1228 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1229 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1230 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1231 IndexReg != X86::RIZ) {
1232 ErrMsg = "base register is 64-bit, but index register is not";
1235 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1236 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1237 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1238 IndexReg != X86::EIZ){
1239 ErrMsg = "base register is 32-bit, but index register is not";
1242 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1243 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1244 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1245 ErrMsg = "base register is 16-bit, but index register is not";
1248 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
1249 IndexReg != X86::SI && IndexReg != X86::DI) ||
1250 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
1251 IndexReg != X86::BX && IndexReg != X86::BP)) {
1252 ErrMsg = "invalid 16-bit base/index register combination";
1260 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
1262 // Return true and let a normal complaint about bogus operands happen.
1263 if (!Op1.isMem() || !Op2.isMem())
1266 // Actually these might be the other way round if Intel syntax is
1267 // being used. It doesn't matter.
1268 unsigned diReg = Op1.Mem.BaseReg;
1269 unsigned siReg = Op2.Mem.BaseReg;
1271 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
1272 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
1273 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
1274 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
1275 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
1276 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
1277 // Again, return true and let another error happen.
1281 bool X86AsmParser::ParseRegister(unsigned &RegNo,
1282 SMLoc &StartLoc, SMLoc &EndLoc) {
1284 const AsmToken &PercentTok = Parser.getTok();
1285 StartLoc = PercentTok.getLoc();
1287 // If we encounter a %, ignore it. This code handles registers with and
1288 // without the prefix, unprefixed registers can occur in cfi directives.
1289 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
1290 Parser.Lex(); // Eat percent token.
1292 const AsmToken &Tok = Parser.getTok();
1293 EndLoc = Tok.getEndLoc();
1295 if (Tok.isNot(AsmToken::Identifier)) {
1296 if (isParsingIntelSyntax()) return true;
1297 return Error(StartLoc, "invalid register name",
1298 SMRange(StartLoc, EndLoc));
1301 RegNo = MatchRegisterName(Tok.getString());
1303 // If the match failed, try the register name as lowercase.
1305 RegNo = MatchRegisterName(Tok.getString().lower());
1307 if (!is64BitMode()) {
1308 // FIXME: This should be done using Requires<Not64BitMode> and
1309 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1311 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1313 if (RegNo == X86::RIZ ||
1314 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1315 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1316 X86II::isX86_64ExtendedReg(RegNo))
1317 return Error(StartLoc, "register %"
1318 + Tok.getString() + " is only available in 64-bit mode",
1319 SMRange(StartLoc, EndLoc));
1322 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1323 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
1325 Parser.Lex(); // Eat 'st'
1327 // Check to see if we have '(4)' after %st.
1328 if (getLexer().isNot(AsmToken::LParen))
1333 const AsmToken &IntTok = Parser.getTok();
1334 if (IntTok.isNot(AsmToken::Integer))
1335 return Error(IntTok.getLoc(), "expected stack index");
1336 switch (IntTok.getIntVal()) {
1337 case 0: RegNo = X86::ST0; break;
1338 case 1: RegNo = X86::ST1; break;
1339 case 2: RegNo = X86::ST2; break;
1340 case 3: RegNo = X86::ST3; break;
1341 case 4: RegNo = X86::ST4; break;
1342 case 5: RegNo = X86::ST5; break;
1343 case 6: RegNo = X86::ST6; break;
1344 case 7: RegNo = X86::ST7; break;
1345 default: return Error(IntTok.getLoc(), "invalid stack index");
1348 if (getParser().Lex().isNot(AsmToken::RParen))
1349 return Error(Parser.getTok().getLoc(), "expected ')'");
1351 EndLoc = Parser.getTok().getEndLoc();
1352 Parser.Lex(); // Eat ')'
1356 EndLoc = Parser.getTok().getEndLoc();
1358 // If this is "db[0-7]", match it as an alias
1360 if (RegNo == 0 && Tok.getString().size() == 3 &&
1361 Tok.getString().startswith("db")) {
1362 switch (Tok.getString()[2]) {
1363 case '0': RegNo = X86::DR0; break;
1364 case '1': RegNo = X86::DR1; break;
1365 case '2': RegNo = X86::DR2; break;
1366 case '3': RegNo = X86::DR3; break;
1367 case '4': RegNo = X86::DR4; break;
1368 case '5': RegNo = X86::DR5; break;
1369 case '6': RegNo = X86::DR6; break;
1370 case '7': RegNo = X86::DR7; break;
1374 EndLoc = Parser.getTok().getEndLoc();
1375 Parser.Lex(); // Eat it.
1381 if (isParsingIntelSyntax()) return true;
1382 return Error(StartLoc, "invalid register name",
1383 SMRange(StartLoc, EndLoc));
1386 Parser.Lex(); // Eat identifier token.
1390 X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1392 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1393 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
1394 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
1395 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
1398 X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1400 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1401 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
1402 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
1403 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
1406 X86Operand *X86AsmParser::ParseOperand() {
1407 if (isParsingIntelSyntax())
1408 return ParseIntelOperand();
1409 return ParseATTOperand();
1412 /// getIntelMemOperandSize - Return intel memory operand size.
1413 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1414 unsigned Size = StringSwitch<unsigned>(OpStr)
1415 .Cases("BYTE", "byte", 8)
1416 .Cases("WORD", "word", 16)
1417 .Cases("DWORD", "dword", 32)
1418 .Cases("QWORD", "qword", 64)
1419 .Cases("XWORD", "xword", 80)
1420 .Cases("XMMWORD", "xmmword", 128)
1421 .Cases("YMMWORD", "ymmword", 256)
1422 .Cases("ZMMWORD", "zmmword", 512)
1423 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1429 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
1430 unsigned BaseReg, unsigned IndexReg,
1431 unsigned Scale, SMLoc Start, SMLoc End,
1432 unsigned Size, StringRef Identifier,
1433 InlineAsmIdentifierInfo &Info){
1434 if (isa<MCSymbolRefExpr>(Disp)) {
1435 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1436 // reference. We need an 'r' constraint here, so we need to create register
1437 // operand to ensure proper matching. Just pick a GPR based on the size of
1439 if (!Info.IsVarDecl) {
1441 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1442 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
1443 SMLoc(), Identifier, Info.OpDecl);
1446 Size = Info.Type * 8; // Size is in terms of bits in this context.
1448 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1453 // When parsing inline assembly we set the base register to a non-zero value
1454 // if we don't know the actual value at this time. This is necessary to
1455 // get the matching correct in some cases.
1456 BaseReg = BaseReg ? BaseReg : 1;
1457 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1458 End, Size, Identifier, Info.OpDecl);
1462 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1463 StringRef SymName, int64_t ImmDisp,
1464 int64_t FinalImmDisp, SMLoc &BracLoc,
1465 SMLoc &StartInBrac, SMLoc &End) {
1466 // Remove the '[' and ']' from the IR string.
1467 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1468 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1470 // If ImmDisp is non-zero, then we parsed a displacement before the
1471 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1472 // If ImmDisp doesn't match the displacement computed by the state machine
1473 // then we have an additional displacement in the bracketed expression.
1474 if (ImmDisp != FinalImmDisp) {
1476 // We have an immediate displacement before the bracketed expression.
1477 // Adjust this to match the final immediate displacement.
1479 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1480 E = AsmRewrites->end(); I != E; ++I) {
1481 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1483 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1484 assert (!Found && "ImmDisp already rewritten.");
1485 (*I).Kind = AOK_Imm;
1486 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1487 (*I).Val = FinalImmDisp;
1492 assert (Found && "Unable to rewrite ImmDisp.");
1495 // We have a symbolic and an immediate displacement, but no displacement
1496 // before the bracketed expression. Put the immediate displacement
1497 // before the bracketed expression.
1498 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1501 // Remove all the ImmPrefix rewrites within the brackets.
1502 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1503 E = AsmRewrites->end(); I != E; ++I) {
1504 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1506 if ((*I).Kind == AOK_ImmPrefix)
1507 (*I).Kind = AOK_Delete;
1509 const char *SymLocPtr = SymName.data();
1510 // Skip everything before the symbol.
1511 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1512 assert(Len > 0 && "Expected a non-negative length.");
1513 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1515 // Skip everything after the symbol.
1516 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1517 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1518 assert(Len > 0 && "Expected a non-negative length.");
1519 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1523 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1524 const AsmToken &Tok = Parser.getTok();
1528 bool UpdateLocLex = true;
1530 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1531 // identifier. Don't try an parse it as a register.
1532 if (Tok.getString().startswith("."))
1535 // If we're parsing an immediate expression, we don't expect a '['.
1536 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1539 switch (getLexer().getKind()) {
1541 if (SM.isValidEndState()) {
1545 return Error(Tok.getLoc(), "unknown token in expression");
1547 case AsmToken::EndOfStatement: {
1551 case AsmToken::Identifier: {
1552 // This could be a register or a symbolic displacement.
1555 SMLoc IdentLoc = Tok.getLoc();
1556 StringRef Identifier = Tok.getString();
1557 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1558 SM.onRegister(TmpReg);
1559 UpdateLocLex = false;
1562 if (!isParsingInlineAsm()) {
1563 if (getParser().parsePrimaryExpr(Val, End))
1564 return Error(Tok.getLoc(), "Unexpected identifier!");
1566 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1567 if (ParseIntelIdentifier(Val, Identifier, Info,
1568 /*Unevaluated=*/false, End))
1571 SM.onIdentifierExpr(Val, Identifier);
1572 UpdateLocLex = false;
1575 return Error(Tok.getLoc(), "Unexpected identifier!");
1577 case AsmToken::Integer: {
1579 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1580 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1582 // Look for 'b' or 'f' following an Integer as a directional label
1583 SMLoc Loc = getTok().getLoc();
1584 int64_t IntVal = getTok().getIntVal();
1585 End = consumeToken();
1586 UpdateLocLex = false;
1587 if (getLexer().getKind() == AsmToken::Identifier) {
1588 StringRef IDVal = getTok().getString();
1589 if (IDVal == "f" || IDVal == "b") {
1591 getContext().GetDirectionalLocalSymbol(IntVal,
1592 IDVal == "f" ? 1 : 0);
1593 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1595 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1596 if (IDVal == "b" && Sym->isUndefined())
1597 return Error(Loc, "invalid reference to undefined symbol");
1598 StringRef Identifier = Sym->getName();
1599 SM.onIdentifierExpr(Val, Identifier);
1600 End = consumeToken();
1602 if (SM.onInteger(IntVal, ErrMsg))
1603 return Error(Loc, ErrMsg);
1606 if (SM.onInteger(IntVal, ErrMsg))
1607 return Error(Loc, ErrMsg);
1611 case AsmToken::Plus: SM.onPlus(); break;
1612 case AsmToken::Minus: SM.onMinus(); break;
1613 case AsmToken::Star: SM.onStar(); break;
1614 case AsmToken::Slash: SM.onDivide(); break;
1615 case AsmToken::Pipe: SM.onOr(); break;
1616 case AsmToken::Amp: SM.onAnd(); break;
1617 case AsmToken::LessLess:
1618 SM.onLShift(); break;
1619 case AsmToken::GreaterGreater:
1620 SM.onRShift(); break;
1621 case AsmToken::LBrac: SM.onLBrac(); break;
1622 case AsmToken::RBrac: SM.onRBrac(); break;
1623 case AsmToken::LParen: SM.onLParen(); break;
1624 case AsmToken::RParen: SM.onRParen(); break;
1627 return Error(Tok.getLoc(), "unknown token in expression");
1629 if (!Done && UpdateLocLex)
1630 End = consumeToken();
1635 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1638 const AsmToken &Tok = Parser.getTok();
1639 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1640 if (getLexer().isNot(AsmToken::LBrac))
1641 return ErrorOperand(BracLoc, "Expected '[' token!");
1642 Parser.Lex(); // Eat '['
1644 SMLoc StartInBrac = Tok.getLoc();
1645 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1646 // may have already parsed an immediate displacement before the bracketed
1648 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1649 if (ParseIntelExpression(SM, End))
1653 if (const MCExpr *Sym = SM.getSym()) {
1654 // A symbolic displacement.
1656 if (isParsingInlineAsm())
1657 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1658 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1661 // An immediate displacement only.
1662 Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1665 // Parse the dot operator (e.g., [ebx].foo.bar).
1666 if (Tok.getString().startswith(".")) {
1667 const MCExpr *NewDisp;
1668 if (ParseIntelDotOperator(Disp, NewDisp))
1671 End = Tok.getEndLoc();
1672 Parser.Lex(); // Eat the field.
1676 int BaseReg = SM.getBaseReg();
1677 int IndexReg = SM.getIndexReg();
1678 int Scale = SM.getScale();
1679 if (!isParsingInlineAsm()) {
1681 if (!BaseReg && !IndexReg) {
1683 return X86Operand::CreateMem(Disp, Start, End, Size);
1685 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1688 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1689 Error(StartInBrac, ErrMsg);
1692 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1696 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1697 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1698 End, Size, SM.getSymName(), Info);
1701 // Inline assembly may use variable names with namespace alias qualifiers.
1702 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1703 StringRef &Identifier,
1704 InlineAsmIdentifierInfo &Info,
1705 bool IsUnevaluatedOperand, SMLoc &End) {
1706 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1709 StringRef LineBuf(Identifier.data());
1710 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1712 const AsmToken &Tok = Parser.getTok();
1714 // Advance the token stream until the end of the current token is
1715 // after the end of what the frontend claimed.
1716 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1718 End = Tok.getEndLoc();
1721 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1722 if (End.getPointer() == EndPtr) break;
1725 // Create the symbol reference.
1726 Identifier = LineBuf;
1727 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1728 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1729 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1733 /// \brief Parse intel style segment override.
1734 X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
1737 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1738 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1739 if (Tok.isNot(AsmToken::Colon))
1740 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1741 Parser.Lex(); // Eat ':'
1743 int64_t ImmDisp = 0;
1744 if (getLexer().is(AsmToken::Integer)) {
1745 ImmDisp = Tok.getIntVal();
1746 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1748 if (isParsingInlineAsm())
1749 InstInfo->AsmRewrites->push_back(
1750 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1752 if (getLexer().isNot(AsmToken::LBrac)) {
1753 // An immediate following a 'segment register', 'colon' token sequence can
1754 // be followed by a bracketed expression. If it isn't we know we have our
1755 // final segment override.
1756 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1757 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1758 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1763 if (getLexer().is(AsmToken::LBrac))
1764 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1768 if (!isParsingInlineAsm()) {
1769 if (getParser().parsePrimaryExpr(Val, End))
1770 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1772 return X86Operand::CreateMem(Val, Start, End, Size);
1775 InlineAsmIdentifierInfo Info;
1776 StringRef Identifier = Tok.getString();
1777 if (ParseIntelIdentifier(Val, Identifier, Info,
1778 /*Unevaluated=*/false, End))
1780 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1781 /*Scale=*/1, Start, End, Size, Identifier, Info);
1784 /// ParseIntelMemOperand - Parse intel style memory operand.
1785 X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
1787 const AsmToken &Tok = Parser.getTok();
1790 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1791 if (getLexer().is(AsmToken::LBrac))
1792 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1795 if (!isParsingInlineAsm()) {
1796 if (getParser().parsePrimaryExpr(Val, End))
1797 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1799 return X86Operand::CreateMem(Val, Start, End, Size);
1802 InlineAsmIdentifierInfo Info;
1803 StringRef Identifier = Tok.getString();
1804 if (ParseIntelIdentifier(Val, Identifier, Info,
1805 /*Unevaluated=*/false, End))
1807 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1808 /*Scale=*/1, Start, End, Size, Identifier, Info);
1811 /// Parse the '.' operator.
1812 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1813 const MCExpr *&NewDisp) {
1814 const AsmToken &Tok = Parser.getTok();
1815 int64_t OrigDispVal, DotDispVal;
1817 // FIXME: Handle non-constant expressions.
1818 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1819 OrigDispVal = OrigDisp->getValue();
1821 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1824 StringRef DotDispStr = Tok.getString().drop_front(1);
1826 // .Imm gets lexed as a real.
1827 if (Tok.is(AsmToken::Real)) {
1829 DotDispStr.getAsInteger(10, DotDisp);
1830 DotDispVal = DotDisp.getZExtValue();
1831 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1833 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1834 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1836 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1837 DotDispVal = DotDisp;
1839 return Error(Tok.getLoc(), "Unexpected token type!");
1841 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1842 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1843 unsigned Len = DotDispStr.size();
1844 unsigned Val = OrigDispVal + DotDispVal;
1845 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1849 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1853 /// Parse the 'offset' operator. This operator is used to specify the
1854 /// location rather then the content of a variable.
1855 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1856 const AsmToken &Tok = Parser.getTok();
1857 SMLoc OffsetOfLoc = Tok.getLoc();
1858 Parser.Lex(); // Eat offset.
1861 InlineAsmIdentifierInfo Info;
1862 SMLoc Start = Tok.getLoc(), End;
1863 StringRef Identifier = Tok.getString();
1864 if (ParseIntelIdentifier(Val, Identifier, Info,
1865 /*Unevaluated=*/false, End))
1868 // Don't emit the offset operator.
1869 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1871 // The offset operator will have an 'r' constraint, thus we need to create
1872 // register operand to ensure proper matching. Just pick a GPR based on
1873 // the size of a pointer.
1875 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1876 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1877 OffsetOfLoc, Identifier, Info.OpDecl);
1880 enum IntelOperatorKind {
1886 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1887 /// returns the number of elements in an array. It returns the value 1 for
1888 /// non-array variables. The SIZE operator returns the size of a C or C++
1889 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1890 /// TYPE operator returns the size of a C or C++ type or variable. If the
1891 /// variable is an array, TYPE returns the size of a single element.
1892 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1893 const AsmToken &Tok = Parser.getTok();
1894 SMLoc TypeLoc = Tok.getLoc();
1895 Parser.Lex(); // Eat operator.
1897 const MCExpr *Val = 0;
1898 InlineAsmIdentifierInfo Info;
1899 SMLoc Start = Tok.getLoc(), End;
1900 StringRef Identifier = Tok.getString();
1901 if (ParseIntelIdentifier(Val, Identifier, Info,
1902 /*Unevaluated=*/true, End))
1906 return ErrorOperand(Start, "unable to lookup expression");
1910 default: llvm_unreachable("Unexpected operand kind!");
1911 case IOK_LENGTH: CVal = Info.Length; break;
1912 case IOK_SIZE: CVal = Info.Size; break;
1913 case IOK_TYPE: CVal = Info.Type; break;
1916 // Rewrite the type operator and the C or C++ type or variable in terms of an
1917 // immediate. E.g. TYPE foo -> $$4
1918 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1919 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1921 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1922 return X86Operand::CreateImm(Imm, Start, End);
1925 X86Operand *X86AsmParser::ParseIntelOperand() {
1926 const AsmToken &Tok = Parser.getTok();
1929 // Offset, length, type and size operators.
1930 if (isParsingInlineAsm()) {
1931 StringRef AsmTokStr = Tok.getString();
1932 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1933 return ParseIntelOffsetOfOperator();
1934 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1935 return ParseIntelOperator(IOK_LENGTH);
1936 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1937 return ParseIntelOperator(IOK_SIZE);
1938 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1939 return ParseIntelOperator(IOK_TYPE);
1942 unsigned Size = getIntelMemOperandSize(Tok.getString());
1944 Parser.Lex(); // Eat operand size (e.g., byte, word).
1945 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1946 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1947 Parser.Lex(); // Eat ptr.
1949 Start = Tok.getLoc();
1952 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1953 getLexer().is(AsmToken::LParen)) {
1954 AsmToken StartTok = Tok;
1955 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1956 /*AddImmPrefix=*/false);
1957 if (ParseIntelExpression(SM, End))
1960 int64_t Imm = SM.getImm();
1961 if (isParsingInlineAsm()) {
1962 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1963 if (StartTok.getString().size() == Len)
1964 // Just add a prefix if this wasn't a complex immediate expression.
1965 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1967 // Otherwise, rewrite the complex expression as a single immediate.
1968 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1971 if (getLexer().isNot(AsmToken::LBrac)) {
1972 // If a directional label (ie. 1f or 2b) was parsed above from
1973 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1974 // to the MCExpr with the directional local symbol and this is a
1975 // memory operand not an immediate operand.
1977 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1979 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1980 return X86Operand::CreateImm(ImmExpr, Start, End);
1983 // Only positive immediates are valid.
1985 return ErrorOperand(Start, "expected a positive immediate displacement "
1986 "before bracketed expr.");
1988 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1989 return ParseIntelMemOperand(Imm, Start, Size);
1994 if (!ParseRegister(RegNo, Start, End)) {
1995 // If this is a segment register followed by a ':', then this is the start
1996 // of a segment override, otherwise this is a normal register reference.
1997 if (getLexer().isNot(AsmToken::Colon))
1998 return X86Operand::CreateReg(RegNo, Start, End);
2000 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
2004 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
2007 X86Operand *X86AsmParser::ParseATTOperand() {
2008 switch (getLexer().getKind()) {
2010 // Parse a memory operand with no segment register.
2011 return ParseMemOperand(0, Parser.getTok().getLoc());
2012 case AsmToken::Percent: {
2013 // Read the register.
2016 if (ParseRegister(RegNo, Start, End)) return 0;
2017 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
2018 Error(Start, "%eiz and %riz can only be used as index registers",
2019 SMRange(Start, End));
2023 // If this is a segment register followed by a ':', then this is the start
2024 // of a memory reference, otherwise this is a normal register reference.
2025 if (getLexer().isNot(AsmToken::Colon))
2026 return X86Operand::CreateReg(RegNo, Start, End);
2028 getParser().Lex(); // Eat the colon.
2029 return ParseMemOperand(RegNo, Start);
2031 case AsmToken::Dollar: {
2032 // $42 -> immediate.
2033 SMLoc Start = Parser.getTok().getLoc(), End;
2036 if (getParser().parseExpression(Val, End))
2038 return X86Operand::CreateImm(Val, Start, End);
2043 bool X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2044 const MCParsedAsmOperand &Op) {
2045 if(STI.getFeatureBits() & X86::FeatureAVX512) {
2046 if (getLexer().is(AsmToken::LCurly)) {
2047 // Eat "{" and mark the current place.
2048 const SMLoc consumedToken = consumeToken();
2049 // Distinguish {1to<NUM>} from {%k<NUM>}.
2050 if(getLexer().is(AsmToken::Integer)) {
2051 // Parse memory broadcasting ({1to<NUM>}).
2052 if (getLexer().getTok().getIntVal() != 1)
2053 return !ErrorAndEatStatement(getLexer().getLoc(),
2054 "Expected 1to<NUM> at this point");
2055 Parser.Lex(); // Eat "1" of 1to8
2056 if (!getLexer().is(AsmToken::Identifier) ||
2057 !getLexer().getTok().getIdentifier().startswith("to"))
2058 return !ErrorAndEatStatement(getLexer().getLoc(),
2059 "Expected 1to<NUM> at this point");
2060 // Recognize only reasonable suffixes.
2061 const char *BroadcastPrimitive =
2062 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2063 .Case("to8", "{1to8}")
2064 .Case("to16", "{1to16}")
2066 if (!BroadcastPrimitive)
2067 return !ErrorAndEatStatement(getLexer().getLoc(),
2068 "Invalid memory broadcast primitive.");
2069 Parser.Lex(); // Eat "toN" of 1toN
2070 if (!getLexer().is(AsmToken::RCurly))
2071 return !ErrorAndEatStatement(getLexer().getLoc(),
2072 "Expected } at this point");
2073 Parser.Lex(); // Eat "}"
2074 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2076 // No AVX512 specific primitives can pass
2077 // after memory broadcasting, so return.
2080 // Parse mask register {%k1}
2081 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
2082 if (X86Operand *Op = ParseOperand()) {
2083 Operands.push_back(Op);
2084 if (!getLexer().is(AsmToken::RCurly))
2085 return !ErrorAndEatStatement(getLexer().getLoc(),
2086 "Expected } at this point");
2087 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2089 // Parse "zeroing non-masked" semantic {z}
2090 if (getLexer().is(AsmToken::LCurly)) {
2091 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
2092 if (!getLexer().is(AsmToken::Identifier) ||
2093 getLexer().getTok().getIdentifier() != "z")
2094 return !ErrorAndEatStatement(getLexer().getLoc(),
2095 "Expected z at this point");
2096 Parser.Lex(); // Eat the z
2097 if (!getLexer().is(AsmToken::RCurly))
2098 return !ErrorAndEatStatement(getLexer().getLoc(),
2099 "Expected } at this point");
2100 Parser.Lex(); // Eat the }
2109 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
2110 /// has already been parsed if present.
2111 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
2113 // We have to disambiguate a parenthesized expression "(4+5)" from the start
2114 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
2115 // only way to do this without lookahead is to eat the '(' and see what is
2117 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
2118 if (getLexer().isNot(AsmToken::LParen)) {
2120 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
2122 // After parsing the base expression we could either have a parenthesized
2123 // memory address or not. If not, return now. If so, eat the (.
2124 if (getLexer().isNot(AsmToken::LParen)) {
2125 // Unless we have a segment register, treat this as an immediate.
2127 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
2128 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
2134 // Okay, we have a '('. We don't know if this is an expression or not, but
2135 // so we have to eat the ( to see beyond it.
2136 SMLoc LParenLoc = Parser.getTok().getLoc();
2137 Parser.Lex(); // Eat the '('.
2139 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2140 // Nothing to do here, fall into the code below with the '(' part of the
2141 // memory operand consumed.
2145 // It must be an parenthesized expression, parse it now.
2146 if (getParser().parseParenExpression(Disp, ExprEnd))
2149 // After parsing the base expression we could either have a parenthesized
2150 // memory address or not. If not, return now. If so, eat the (.
2151 if (getLexer().isNot(AsmToken::LParen)) {
2152 // Unless we have a segment register, treat this as an immediate.
2154 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
2155 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
2163 // If we reached here, then we just ate the ( of the memory operand. Process
2164 // the rest of the memory operand.
2165 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2166 SMLoc IndexLoc, BaseLoc;
2168 if (getLexer().is(AsmToken::Percent)) {
2169 SMLoc StartLoc, EndLoc;
2170 BaseLoc = Parser.getTok().getLoc();
2171 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
2172 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2173 Error(StartLoc, "eiz and riz can only be used as index registers",
2174 SMRange(StartLoc, EndLoc));
2179 if (getLexer().is(AsmToken::Comma)) {
2180 Parser.Lex(); // Eat the comma.
2181 IndexLoc = Parser.getTok().getLoc();
2183 // Following the comma we should have either an index register, or a scale
2184 // value. We don't support the later form, but we want to parse it
2187 // Not that even though it would be completely consistent to support syntax
2188 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2189 if (getLexer().is(AsmToken::Percent)) {
2191 if (ParseRegister(IndexReg, L, L)) return 0;
2193 if (getLexer().isNot(AsmToken::RParen)) {
2194 // Parse the scale amount:
2195 // ::= ',' [scale-expression]
2196 if (getLexer().isNot(AsmToken::Comma)) {
2197 Error(Parser.getTok().getLoc(),
2198 "expected comma in scale expression");
2201 Parser.Lex(); // Eat the comma.
2203 if (getLexer().isNot(AsmToken::RParen)) {
2204 SMLoc Loc = Parser.getTok().getLoc();
2207 if (getParser().parseAbsoluteExpression(ScaleVal)){
2208 Error(Loc, "expected scale expression");
2212 // Validate the scale amount.
2213 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2215 Error(Loc, "scale factor in 16-bit address must be 1");
2218 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
2219 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2222 Scale = (unsigned)ScaleVal;
2225 } else if (getLexer().isNot(AsmToken::RParen)) {
2226 // A scale amount without an index is ignored.
2228 SMLoc Loc = Parser.getTok().getLoc();
2231 if (getParser().parseAbsoluteExpression(Value))
2235 Warning(Loc, "scale factor without index register is ignored");
2240 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2241 if (getLexer().isNot(AsmToken::RParen)) {
2242 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2245 SMLoc MemEnd = Parser.getTok().getEndLoc();
2246 Parser.Lex(); // Eat the ')'.
2248 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2249 // and then only in non-64-bit modes. Except for DX, which is a special case
2250 // because an unofficial form of in/out instructions uses it.
2251 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2252 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2253 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2254 BaseReg != X86::DX) {
2255 Error(BaseLoc, "invalid 16-bit base register");
2259 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2260 Error(IndexLoc, "16-bit memory operand may not include only index register");
2265 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2266 Error(BaseLoc, ErrMsg);
2270 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
2275 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
2276 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
2278 StringRef PatchedName = Name;
2280 // FIXME: Hack to recognize setneb as setne.
2281 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2282 PatchedName != "setb" && PatchedName != "setnb")
2283 PatchedName = PatchedName.substr(0, Name.size()-1);
2285 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2286 const MCExpr *ExtraImmOp = 0;
2287 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2288 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2289 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2290 bool IsVCMP = PatchedName[0] == 'v';
2291 unsigned SSECCIdx = IsVCMP ? 4 : 3;
2292 unsigned SSEComparisonCode = StringSwitch<unsigned>(
2293 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
2297 .Case("unord", 0x03)
2302 /* AVX only from here */
2303 .Case("eq_uq", 0x08)
2306 .Case("false", 0x0B)
2307 .Case("neq_oq", 0x0C)
2311 .Case("eq_os", 0x10)
2312 .Case("lt_oq", 0x11)
2313 .Case("le_oq", 0x12)
2314 .Case("unord_s", 0x13)
2315 .Case("neq_us", 0x14)
2316 .Case("nlt_uq", 0x15)
2317 .Case("nle_uq", 0x16)
2318 .Case("ord_s", 0x17)
2319 .Case("eq_us", 0x18)
2320 .Case("nge_uq", 0x19)
2321 .Case("ngt_uq", 0x1A)
2322 .Case("false_os", 0x1B)
2323 .Case("neq_os", 0x1C)
2324 .Case("ge_oq", 0x1D)
2325 .Case("gt_oq", 0x1E)
2326 .Case("true_us", 0x1F)
2328 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
2329 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
2330 getParser().getContext());
2331 if (PatchedName.endswith("ss")) {
2332 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
2333 } else if (PatchedName.endswith("sd")) {
2334 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
2335 } else if (PatchedName.endswith("ps")) {
2336 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
2338 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
2339 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
2344 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2346 if (ExtraImmOp && !isParsingIntelSyntax())
2347 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2349 // Determine whether this is an instruction prefix.
2351 Name == "lock" || Name == "rep" ||
2352 Name == "repe" || Name == "repz" ||
2353 Name == "repne" || Name == "repnz" ||
2354 Name == "rex64" || Name == "data16";
2357 // This does the actual operand parsing. Don't parse any more if we have a
2358 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2359 // just want to parse the "lock" as the first instruction and the "incl" as
2361 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2363 // Parse '*' modifier.
2364 if (getLexer().is(AsmToken::Star))
2365 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2367 // Read the operands.
2369 if (X86Operand *Op = ParseOperand()) {
2370 Operands.push_back(Op);
2371 if (!HandleAVX512Operand(Operands, *Op))
2374 Parser.eatToEndOfStatement();
2377 // check for comma and eat it
2378 if (getLexer().is(AsmToken::Comma))
2384 if (getLexer().isNot(AsmToken::EndOfStatement))
2385 return ErrorAndEatStatement(getLexer().getLoc(), "unexpected token in argument list");
2388 // Consume the EndOfStatement or the prefix separator Slash
2389 if (getLexer().is(AsmToken::EndOfStatement) || isPrefix && getLexer().is(AsmToken::Slash))
2392 if (ExtraImmOp && isParsingIntelSyntax())
2393 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2395 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2396 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2397 // documented form in various unofficial manuals, so a lot of code uses it.
2398 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2399 Operands.size() == 3) {
2400 X86Operand &Op = *(X86Operand*)Operands.back();
2401 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2402 isa<MCConstantExpr>(Op.Mem.Disp) &&
2403 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2404 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2405 SMLoc Loc = Op.getEndLoc();
2406 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2410 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2411 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2412 Operands.size() == 3) {
2413 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2414 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2415 isa<MCConstantExpr>(Op.Mem.Disp) &&
2416 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2417 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2418 SMLoc Loc = Op.getEndLoc();
2419 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2424 // Append default arguments to "ins[bwld]"
2425 if (Name.startswith("ins") && Operands.size() == 1 &&
2426 (Name == "insb" || Name == "insw" || Name == "insl" ||
2428 if (isParsingIntelSyntax()) {
2429 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2430 Operands.push_back(DefaultMemDIOperand(NameLoc));
2432 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2433 Operands.push_back(DefaultMemDIOperand(NameLoc));
2437 // Append default arguments to "outs[bwld]"
2438 if (Name.startswith("outs") && Operands.size() == 1 &&
2439 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2440 Name == "outsd" )) {
2441 if (isParsingIntelSyntax()) {
2442 Operands.push_back(DefaultMemSIOperand(NameLoc));
2443 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2445 Operands.push_back(DefaultMemSIOperand(NameLoc));
2446 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2450 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2451 // values of $SIREG according to the mode. It would be nice if this
2452 // could be achieved with InstAlias in the tables.
2453 if (Name.startswith("lods") && Operands.size() == 1 &&
2454 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2455 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2456 Operands.push_back(DefaultMemSIOperand(NameLoc));
2458 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2459 // values of $DIREG according to the mode. It would be nice if this
2460 // could be achieved with InstAlias in the tables.
2461 if (Name.startswith("stos") && Operands.size() == 1 &&
2462 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2463 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2464 Operands.push_back(DefaultMemDIOperand(NameLoc));
2466 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2467 // values of $DIREG according to the mode. It would be nice if this
2468 // could be achieved with InstAlias in the tables.
2469 if (Name.startswith("scas") && Operands.size() == 1 &&
2470 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2471 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2472 Operands.push_back(DefaultMemDIOperand(NameLoc));
2474 // Add default SI and DI operands to "cmps[bwlq]".
2475 if (Name.startswith("cmps") &&
2476 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2477 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2478 if (Operands.size() == 1) {
2479 if (isParsingIntelSyntax()) {
2480 Operands.push_back(DefaultMemSIOperand(NameLoc));
2481 Operands.push_back(DefaultMemDIOperand(NameLoc));
2483 Operands.push_back(DefaultMemDIOperand(NameLoc));
2484 Operands.push_back(DefaultMemSIOperand(NameLoc));
2486 } else if (Operands.size() == 3) {
2487 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2488 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2489 if (!doSrcDstMatch(Op, Op2))
2490 return Error(Op.getStartLoc(),
2491 "mismatching source and destination index registers");
2495 // Add default SI and DI operands to "movs[bwlq]".
2496 if ((Name.startswith("movs") &&
2497 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2498 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2499 (Name.startswith("smov") &&
2500 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2501 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2502 if (Operands.size() == 1) {
2503 if (Name == "movsd")
2504 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2505 if (isParsingIntelSyntax()) {
2506 Operands.push_back(DefaultMemDIOperand(NameLoc));
2507 Operands.push_back(DefaultMemSIOperand(NameLoc));
2509 Operands.push_back(DefaultMemSIOperand(NameLoc));
2510 Operands.push_back(DefaultMemDIOperand(NameLoc));
2512 } else if (Operands.size() == 3) {
2513 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2514 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2515 if (!doSrcDstMatch(Op, Op2))
2516 return Error(Op.getStartLoc(),
2517 "mismatching source and destination index registers");
2521 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2523 if ((Name.startswith("shr") || Name.startswith("sar") ||
2524 Name.startswith("shl") || Name.startswith("sal") ||
2525 Name.startswith("rcl") || Name.startswith("rcr") ||
2526 Name.startswith("rol") || Name.startswith("ror")) &&
2527 Operands.size() == 3) {
2528 if (isParsingIntelSyntax()) {
2530 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2531 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2532 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2534 Operands.pop_back();
2537 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2538 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2539 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2541 Operands.erase(Operands.begin() + 1);
2546 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2547 // instalias with an immediate operand yet.
2548 if (Name == "int" && Operands.size() == 2) {
2549 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2550 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2551 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2553 Operands.erase(Operands.begin() + 1);
2554 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2561 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2564 TmpInst.setOpcode(Opcode);
2566 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2567 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2568 TmpInst.addOperand(Inst.getOperand(0));
2573 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2574 bool isCmp = false) {
2575 if (!Inst.getOperand(0).isImm() ||
2576 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2579 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2582 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2583 bool isCmp = false) {
2584 if (!Inst.getOperand(0).isImm() ||
2585 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2588 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2591 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2592 bool isCmp = false) {
2593 if (!Inst.getOperand(0).isImm() ||
2594 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2597 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2601 processInstruction(MCInst &Inst,
2602 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2603 switch (Inst.getOpcode()) {
2604 default: return false;
2605 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2606 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2607 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2608 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2609 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2610 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2611 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2612 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2613 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2614 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2615 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2616 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2617 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2618 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2619 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2620 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2621 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2622 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2623 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2624 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2625 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2626 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2627 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2628 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2629 case X86::VMOVAPDrr:
2630 case X86::VMOVAPDYrr:
2631 case X86::VMOVAPSrr:
2632 case X86::VMOVAPSYrr:
2633 case X86::VMOVDQArr:
2634 case X86::VMOVDQAYrr:
2635 case X86::VMOVDQUrr:
2636 case X86::VMOVDQUYrr:
2637 case X86::VMOVUPDrr:
2638 case X86::VMOVUPDYrr:
2639 case X86::VMOVUPSrr:
2640 case X86::VMOVUPSYrr: {
2641 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2642 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2646 switch (Inst.getOpcode()) {
2647 default: llvm_unreachable("Invalid opcode");
2648 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2649 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2650 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2651 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2652 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2653 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2654 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2655 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2656 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2657 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2658 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2659 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2661 Inst.setOpcode(NewOpc);
2665 case X86::VMOVSSrr: {
2666 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2667 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2670 switch (Inst.getOpcode()) {
2671 default: llvm_unreachable("Invalid opcode");
2672 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2673 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2675 Inst.setOpcode(NewOpc);
2681 static const char *getSubtargetFeatureName(unsigned Val);
2683 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2684 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2685 MCStreamer &Out, unsigned &ErrorInfo,
2686 bool MatchingInlineAsm) {
2687 assert(!Operands.empty() && "Unexpect empty operand list!");
2688 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2689 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2690 ArrayRef<SMRange> EmptyRanges = None;
2692 // First, handle aliases that expand to multiple instructions.
2693 // FIXME: This should be replaced with a real .td file alias mechanism.
2694 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2696 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2697 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2698 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2699 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2701 Inst.setOpcode(X86::WAIT);
2703 if (!MatchingInlineAsm)
2704 Out.EmitInstruction(Inst, STI);
2707 StringSwitch<const char*>(Op->getToken())
2708 .Case("finit", "fninit")
2709 .Case("fsave", "fnsave")
2710 .Case("fstcw", "fnstcw")
2711 .Case("fstcww", "fnstcw")
2712 .Case("fstenv", "fnstenv")
2713 .Case("fstsw", "fnstsw")
2714 .Case("fstsww", "fnstsw")
2715 .Case("fclex", "fnclex")
2717 assert(Repl && "Unknown wait-prefixed instruction");
2719 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2722 bool WasOriginallyInvalidOperand = false;
2725 // First, try a direct match.
2726 switch (MatchInstructionImpl(Operands, Inst,
2727 ErrorInfo, MatchingInlineAsm,
2728 isParsingIntelSyntax())) {
2731 // Some instructions need post-processing to, for example, tweak which
2732 // encoding is selected. Loop on it while changes happen so the
2733 // individual transformations can chain off each other.
2734 if (!MatchingInlineAsm)
2735 while (processInstruction(Inst, Operands))
2739 if (!MatchingInlineAsm)
2740 Out.EmitInstruction(Inst, STI);
2741 Opcode = Inst.getOpcode();
2743 case Match_MissingFeature: {
2744 assert(ErrorInfo && "Unknown missing feature!");
2745 // Special case the error message for the very common case where only
2746 // a single subtarget feature is missing.
2747 std::string Msg = "instruction requires:";
2749 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2750 if (ErrorInfo & Mask) {
2752 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2756 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2758 case Match_InvalidOperand:
2759 WasOriginallyInvalidOperand = true;
2761 case Match_MnemonicFail:
2765 // FIXME: Ideally, we would only attempt suffix matches for things which are
2766 // valid prefixes, and we could just infer the right unambiguous
2767 // type. However, that requires substantially more matcher support than the
2770 // Change the operand to point to a temporary token.
2771 StringRef Base = Op->getToken();
2772 SmallString<16> Tmp;
2775 Op->setTokenValue(Tmp.str());
2777 // If this instruction starts with an 'f', then it is a floating point stack
2778 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2779 // 80-bit floating point, which use the suffixes s,l,t respectively.
2781 // Otherwise, we assume that this may be an integer instruction, which comes
2782 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2783 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2785 // Check for the various suffix matches.
2786 Tmp[Base.size()] = Suffixes[0];
2787 unsigned ErrorInfoIgnore;
2788 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2789 unsigned Match1, Match2, Match3, Match4;
2791 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2792 MatchingInlineAsm, isParsingIntelSyntax());
2793 // If this returned as a missing feature failure, remember that.
2794 if (Match1 == Match_MissingFeature)
2795 ErrorInfoMissingFeature = ErrorInfoIgnore;
2796 Tmp[Base.size()] = Suffixes[1];
2797 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2798 MatchingInlineAsm, isParsingIntelSyntax());
2799 // If this returned as a missing feature failure, remember that.
2800 if (Match2 == Match_MissingFeature)
2801 ErrorInfoMissingFeature = ErrorInfoIgnore;
2802 Tmp[Base.size()] = Suffixes[2];
2803 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2804 MatchingInlineAsm, isParsingIntelSyntax());
2805 // If this returned as a missing feature failure, remember that.
2806 if (Match3 == Match_MissingFeature)
2807 ErrorInfoMissingFeature = ErrorInfoIgnore;
2808 Tmp[Base.size()] = Suffixes[3];
2809 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2810 MatchingInlineAsm, isParsingIntelSyntax());
2811 // If this returned as a missing feature failure, remember that.
2812 if (Match4 == Match_MissingFeature)
2813 ErrorInfoMissingFeature = ErrorInfoIgnore;
2815 // Restore the old token.
2816 Op->setTokenValue(Base);
2818 // If exactly one matched, then we treat that as a successful match (and the
2819 // instruction will already have been filled in correctly, since the failing
2820 // matches won't have modified it).
2821 unsigned NumSuccessfulMatches =
2822 (Match1 == Match_Success) + (Match2 == Match_Success) +
2823 (Match3 == Match_Success) + (Match4 == Match_Success);
2824 if (NumSuccessfulMatches == 1) {
2826 if (!MatchingInlineAsm)
2827 Out.EmitInstruction(Inst, STI);
2828 Opcode = Inst.getOpcode();
2832 // Otherwise, the match failed, try to produce a decent error message.
2834 // If we had multiple suffix matches, then identify this as an ambiguous
2836 if (NumSuccessfulMatches > 1) {
2838 unsigned NumMatches = 0;
2839 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2840 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2841 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2842 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2844 SmallString<126> Msg;
2845 raw_svector_ostream OS(Msg);
2846 OS << "ambiguous instructions require an explicit suffix (could be ";
2847 for (unsigned i = 0; i != NumMatches; ++i) {
2850 if (i + 1 == NumMatches)
2852 OS << "'" << Base << MatchChars[i] << "'";
2855 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2859 // Okay, we know that none of the variants matched successfully.
2861 // If all of the instructions reported an invalid mnemonic, then the original
2862 // mnemonic was invalid.
2863 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2864 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2865 if (!WasOriginallyInvalidOperand) {
2866 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2868 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2869 Ranges, MatchingInlineAsm);
2872 // Recover location info for the operand if we know which was the problem.
2873 if (ErrorInfo != ~0U) {
2874 if (ErrorInfo >= Operands.size())
2875 return Error(IDLoc, "too few operands for instruction",
2876 EmptyRanges, MatchingInlineAsm);
2878 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2879 if (Operand->getStartLoc().isValid()) {
2880 SMRange OperandRange = Operand->getLocRange();
2881 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2882 OperandRange, MatchingInlineAsm);
2886 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2890 // If one instruction matched with a missing feature, report this as a
2892 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2893 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2894 std::string Msg = "instruction requires:";
2896 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2897 if (ErrorInfoMissingFeature & Mask) {
2899 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2903 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2906 // If one instruction matched with an invalid operand, report this as an
2908 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2909 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2910 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2915 // If all of these were an outright failure, report it in a useless way.
2916 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2917 EmptyRanges, MatchingInlineAsm);
2922 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2923 StringRef IDVal = DirectiveID.getIdentifier();
2924 if (IDVal == ".word")
2925 return ParseDirectiveWord(2, DirectiveID.getLoc());
2926 else if (IDVal.startswith(".code"))
2927 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2928 else if (IDVal.startswith(".att_syntax")) {
2929 getParser().setAssemblerDialect(0);
2931 } else if (IDVal.startswith(".intel_syntax")) {
2932 getParser().setAssemblerDialect(1);
2933 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2934 // FIXME: Handle noprefix
2935 if (Parser.getTok().getString() == "noprefix")
2943 /// ParseDirectiveWord
2944 /// ::= .word [ expression (, expression)* ]
2945 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2946 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2948 const MCExpr *Value;
2949 if (getParser().parseExpression(Value))
2952 getParser().getStreamer().EmitValue(Value, Size);
2954 if (getLexer().is(AsmToken::EndOfStatement))
2957 // FIXME: Improve diagnostic.
2958 if (getLexer().isNot(AsmToken::Comma)) {
2959 Error(L, "unexpected token in directive");
2970 /// ParseDirectiveCode
2971 /// ::= .code16 | .code32 | .code64
2972 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2973 if (IDVal == ".code16") {
2975 if (!is16BitMode()) {
2976 SwitchMode(X86::Mode16Bit);
2977 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2979 } else if (IDVal == ".code32") {
2981 if (!is32BitMode()) {
2982 SwitchMode(X86::Mode32Bit);
2983 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2985 } else if (IDVal == ".code64") {
2987 if (!is64BitMode()) {
2988 SwitchMode(X86::Mode64Bit);
2989 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2992 Error(L, "unknown directive " + IDVal);
2999 // Force static initialization.
3000 extern "C" void LLVMInitializeX86AsmParser() {
3001 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
3002 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
3005 #define GET_REGISTER_MATCHER
3006 #define GET_MATCHER_IMPLEMENTATION
3007 #define GET_SUBTARGET_FEATURE_NAME
3008 #include "X86GenAsmMatcher.inc"