1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
57 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 MCAsmParser &Parser = getParser();
65 SMLoc Result = Parser.getTok().getLoc();
70 enum InfixCalculatorTok {
85 class InfixCalculator {
86 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
87 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
88 SmallVector<ICToken, 4> PostfixStack;
91 int64_t popOperand() {
92 assert (!PostfixStack.empty() && "Poped an empty stack!");
93 ICToken Op = PostfixStack.pop_back_val();
94 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
95 && "Expected and immediate or register!");
98 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
99 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
100 "Unexpected operand!");
101 PostfixStack.push_back(std::make_pair(Op, Val));
104 void popOperator() { InfixOperatorStack.pop_back(); }
105 void pushOperator(InfixCalculatorTok Op) {
106 // Push the new operator if the stack is empty.
107 if (InfixOperatorStack.empty()) {
108 InfixOperatorStack.push_back(Op);
112 // Push the new operator if it has a higher precedence than the operator
113 // on the top of the stack or the operator on the top of the stack is a
115 unsigned Idx = InfixOperatorStack.size() - 1;
116 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
117 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
118 InfixOperatorStack.push_back(Op);
122 // The operator on the top of the stack has higher precedence than the
124 unsigned ParenCount = 0;
126 // Nothing to process.
127 if (InfixOperatorStack.empty())
130 Idx = InfixOperatorStack.size() - 1;
131 StackOp = InfixOperatorStack[Idx];
132 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
135 // If we have an even parentheses count and we see a left parentheses,
136 // then stop processing.
137 if (!ParenCount && StackOp == IC_LPAREN)
140 if (StackOp == IC_RPAREN) {
142 InfixOperatorStack.pop_back();
143 } else if (StackOp == IC_LPAREN) {
145 InfixOperatorStack.pop_back();
147 InfixOperatorStack.pop_back();
148 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 // Push the new operator.
152 InfixOperatorStack.push_back(Op);
155 // Push any remaining operators onto the postfix stack.
156 while (!InfixOperatorStack.empty()) {
157 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
158 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
159 PostfixStack.push_back(std::make_pair(StackOp, 0));
162 if (PostfixStack.empty())
165 SmallVector<ICToken, 16> OperandStack;
166 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
167 ICToken Op = PostfixStack[i];
168 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
169 OperandStack.push_back(Op);
171 assert (OperandStack.size() > 1 && "Too few operands.");
173 ICToken Op2 = OperandStack.pop_back_val();
174 ICToken Op1 = OperandStack.pop_back_val();
177 report_fatal_error("Unexpected operator!");
180 Val = Op1.second + Op2.second;
181 OperandStack.push_back(std::make_pair(IC_IMM, Val));
184 Val = Op1.second - Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Multiply operation with an immediate and a register!");
190 Val = Op1.second * Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Divide operation with an immediate and a register!");
196 assert (Op2.second != 0 && "Division by zero!");
197 Val = Op1.second / Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "Or operation with an immediate and a register!");
203 Val = Op1.second | Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "And operation with an immediate and a register!");
209 Val = Op1.second & Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Left shift operation with an immediate and a register!");
215 Val = Op1.second << Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Right shift operation with an immediate and a register!");
221 Val = Op1.second >> Op2.second;
222 OperandStack.push_back(std::make_pair(IC_IMM, Val));
227 assert (OperandStack.size() == 1 && "Expected a single result.");
228 return OperandStack.pop_back_val().second;
232 enum IntelExprState {
252 class IntelExprStateMachine {
253 IntelExprState State, PrevState;
254 unsigned BaseReg, IndexReg, TmpReg, Scale;
258 bool StopOnLBrac, AddImmPrefix;
260 InlineAsmIdentifierInfo Info;
262 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
263 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
264 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
265 AddImmPrefix(addimmprefix) { Info.clear(); }
267 unsigned getBaseReg() { return BaseReg; }
268 unsigned getIndexReg() { return IndexReg; }
269 unsigned getScale() { return Scale; }
270 const MCExpr *getSym() { return Sym; }
271 StringRef getSymName() { return SymName; }
272 int64_t getImm() { return Imm + IC.execute(); }
273 bool isValidEndState() {
274 return State == IES_RBRAC || State == IES_INTEGER;
276 bool getStopOnLBrac() { return StopOnLBrac; }
277 bool getAddImmPrefix() { return AddImmPrefix; }
278 bool hadError() { return State == IES_ERROR; }
280 InlineAsmIdentifierInfo &getIdentifierInfo() {
285 IntelExprState CurrState = State;
294 IC.pushOperator(IC_OR);
297 PrevState = CurrState;
300 IntelExprState CurrState = State;
309 IC.pushOperator(IC_AND);
312 PrevState = CurrState;
315 IntelExprState CurrState = State;
324 IC.pushOperator(IC_LSHIFT);
327 PrevState = CurrState;
330 IntelExprState CurrState = State;
339 IC.pushOperator(IC_RSHIFT);
342 PrevState = CurrState;
345 IntelExprState CurrState = State;
354 IC.pushOperator(IC_PLUS);
355 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
356 // If we already have a BaseReg, then assume this is the IndexReg with
361 assert (!IndexReg && "BaseReg/IndexReg already set!");
368 PrevState = CurrState;
371 IntelExprState CurrState = State;
387 // Only push the minus operator if it is not a unary operator.
388 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
389 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
390 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
391 IC.pushOperator(IC_MINUS);
392 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
393 // If we already have a BaseReg, then assume this is the IndexReg with
398 assert (!IndexReg && "BaseReg/IndexReg already set!");
405 PrevState = CurrState;
408 IntelExprState CurrState = State;
418 PrevState = CurrState;
420 void onRegister(unsigned Reg) {
421 IntelExprState CurrState = State;
428 State = IES_REGISTER;
430 IC.pushOperand(IC_REGISTER);
433 // Index Register - Scale * Register
434 if (PrevState == IES_INTEGER) {
435 assert (!IndexReg && "IndexReg already set!");
436 State = IES_REGISTER;
438 // Get the scale and replace the 'Scale * Register' with '0'.
439 Scale = IC.popOperand();
440 IC.pushOperand(IC_IMM);
447 PrevState = CurrState;
449 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
460 SymName = SymRefName;
461 IC.pushOperand(IC_IMM);
465 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
466 IntelExprState CurrState = State;
482 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
483 // Index Register - Register * Scale
484 assert (!IndexReg && "IndexReg already set!");
487 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
488 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
491 // Get the scale and replace the 'Register * Scale' with '0'.
493 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
494 PrevState == IES_OR || PrevState == IES_AND ||
495 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
496 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
497 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
498 PrevState == IES_NOT) &&
499 CurrState == IES_MINUS) {
500 // Unary minus. No need to pop the minus operand because it was never
502 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
503 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
504 PrevState == IES_OR || PrevState == IES_AND ||
505 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
506 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
507 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
508 PrevState == IES_NOT) &&
509 CurrState == IES_NOT) {
510 // Unary not. No need to pop the not operand because it was never
512 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514 IC.pushOperand(IC_IMM, TmpInt);
518 PrevState = CurrState;
530 State = IES_MULTIPLY;
531 IC.pushOperator(IC_MULTIPLY);
544 IC.pushOperator(IC_DIVIDE);
556 IC.pushOperator(IC_PLUS);
561 IntelExprState CurrState = State;
570 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
571 // If we already have a BaseReg, then assume this is the IndexReg with
576 assert (!IndexReg && "BaseReg/IndexReg already set!");
583 PrevState = CurrState;
586 IntelExprState CurrState = State;
601 // FIXME: We don't handle this type of unary minus or not, yet.
602 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
603 PrevState == IES_OR || PrevState == IES_AND ||
604 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
605 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
606 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
607 PrevState == IES_NOT) &&
608 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
613 IC.pushOperator(IC_LPAREN);
616 PrevState = CurrState;
628 IC.pushOperator(IC_RPAREN);
634 bool Error(SMLoc L, const Twine &Msg,
635 ArrayRef<SMRange> Ranges = None,
636 bool MatchingInlineAsm = false) {
637 MCAsmParser &Parser = getParser();
638 if (MatchingInlineAsm) return true;
639 return Parser.Error(L, Msg, Ranges);
642 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
643 ArrayRef<SMRange> Ranges = None,
644 bool MatchingInlineAsm = false) {
645 MCAsmParser &Parser = getParser();
646 Parser.eatToEndOfStatement();
647 return Error(L, Msg, Ranges, MatchingInlineAsm);
650 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
655 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
656 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> ParseOperand();
658 std::unique_ptr<X86Operand> ParseATTOperand();
659 std::unique_ptr<X86Operand> ParseIntelOperand();
660 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
661 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
662 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
663 std::unique_ptr<X86Operand>
664 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
665 std::unique_ptr<X86Operand>
666 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
667 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
668 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
672 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
673 InlineAsmIdentifierInfo &Info,
674 bool IsUnevaluatedOperand, SMLoc &End);
676 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
678 std::unique_ptr<X86Operand>
679 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
680 unsigned IndexReg, unsigned Scale, SMLoc Start,
681 SMLoc End, unsigned Size, StringRef Identifier,
682 InlineAsmIdentifierInfo &Info);
684 bool ParseDirectiveWord(unsigned Size, SMLoc L);
685 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
687 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
689 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
690 /// instrumentation around Inst.
691 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
693 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
694 OperandVector &Operands, MCStreamer &Out,
696 bool MatchingInlineAsm) override;
698 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
699 MCStreamer &Out, bool MatchingInlineAsm);
701 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
702 bool MatchingInlineAsm);
704 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
705 OperandVector &Operands, MCStreamer &Out,
707 bool MatchingInlineAsm);
709 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
710 OperandVector &Operands, MCStreamer &Out,
712 bool MatchingInlineAsm);
714 unsigned getPointerSize() {
715 if (is16BitMode()) return 16;
716 if (is32BitMode()) return 32;
717 if (is64BitMode()) return 64;
718 llvm_unreachable("invalid mode");
721 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
723 /// doSrcDstMatch - Returns true if operands are matching in their
724 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
725 /// the parsing mode (Intel vs. AT&T).
726 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
728 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
729 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
730 /// \return \c true if no parsing errors occurred, \c false otherwise.
731 bool HandleAVX512Operand(OperandVector &Operands,
732 const MCParsedAsmOperand &Op);
734 bool is64BitMode() const {
735 // FIXME: Can tablegen auto-generate this?
736 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
738 bool is32BitMode() const {
739 // FIXME: Can tablegen auto-generate this?
740 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
742 bool is16BitMode() const {
743 // FIXME: Can tablegen auto-generate this?
744 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
746 void SwitchMode(uint64_t mode) {
747 uint64_t oldMode = STI.getFeatureBits() &
748 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
749 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
750 setAvailableFeatures(FB);
751 assert(mode == (STI.getFeatureBits() &
752 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
755 unsigned getPointerWidth() {
756 if (is16BitMode()) return 16;
757 if (is32BitMode()) return 32;
758 if (is64BitMode()) return 64;
759 llvm_unreachable("invalid mode");
762 bool isParsingIntelSyntax() {
763 return getParser().getAssemblerDialect();
766 /// @name Auto-generated Matcher Functions
769 #define GET_ASSEMBLER_HEADER
770 #include "X86GenAsmMatcher.inc"
775 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
776 const MCInstrInfo &mii, const MCTargetOptions &Options)
777 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
779 // Initialize the set of available features.
780 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
781 Instrumentation.reset(
782 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
785 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
787 void SetFrameRegister(unsigned RegNo) override;
789 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
790 SMLoc NameLoc, OperandVector &Operands) override;
792 bool ParseDirective(AsmToken DirectiveID) override;
794 } // end anonymous namespace
796 /// @name Auto-generated Match Functions
799 static unsigned MatchRegisterName(StringRef Name);
803 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
805 // If we have both a base register and an index register make sure they are
806 // both 64-bit or 32-bit registers.
807 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
808 if (BaseReg != 0 && IndexReg != 0) {
809 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
810 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
811 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
812 IndexReg != X86::RIZ) {
813 ErrMsg = "base register is 64-bit, but index register is not";
816 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
817 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
818 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
819 IndexReg != X86::EIZ){
820 ErrMsg = "base register is 32-bit, but index register is not";
823 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
824 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
825 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
826 ErrMsg = "base register is 16-bit, but index register is not";
829 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
830 IndexReg != X86::SI && IndexReg != X86::DI) ||
831 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
832 IndexReg != X86::BX && IndexReg != X86::BP)) {
833 ErrMsg = "invalid 16-bit base/index register combination";
841 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
843 // Return true and let a normal complaint about bogus operands happen.
844 if (!Op1.isMem() || !Op2.isMem())
847 // Actually these might be the other way round if Intel syntax is
848 // being used. It doesn't matter.
849 unsigned diReg = Op1.Mem.BaseReg;
850 unsigned siReg = Op2.Mem.BaseReg;
852 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
853 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
854 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
855 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
856 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
857 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
858 // Again, return true and let another error happen.
862 bool X86AsmParser::ParseRegister(unsigned &RegNo,
863 SMLoc &StartLoc, SMLoc &EndLoc) {
864 MCAsmParser &Parser = getParser();
866 const AsmToken &PercentTok = Parser.getTok();
867 StartLoc = PercentTok.getLoc();
869 // If we encounter a %, ignore it. This code handles registers with and
870 // without the prefix, unprefixed registers can occur in cfi directives.
871 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
872 Parser.Lex(); // Eat percent token.
874 const AsmToken &Tok = Parser.getTok();
875 EndLoc = Tok.getEndLoc();
877 if (Tok.isNot(AsmToken::Identifier)) {
878 if (isParsingIntelSyntax()) return true;
879 return Error(StartLoc, "invalid register name",
880 SMRange(StartLoc, EndLoc));
883 RegNo = MatchRegisterName(Tok.getString());
885 // If the match failed, try the register name as lowercase.
887 RegNo = MatchRegisterName(Tok.getString().lower());
889 if (!is64BitMode()) {
890 // FIXME: This should be done using Requires<Not64BitMode> and
891 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
893 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
895 if (RegNo == X86::RIZ ||
896 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
897 X86II::isX86_64NonExtLowByteReg(RegNo) ||
898 X86II::isX86_64ExtendedReg(RegNo))
899 return Error(StartLoc, "register %"
900 + Tok.getString() + " is only available in 64-bit mode",
901 SMRange(StartLoc, EndLoc));
904 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
905 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
907 Parser.Lex(); // Eat 'st'
909 // Check to see if we have '(4)' after %st.
910 if (getLexer().isNot(AsmToken::LParen))
915 const AsmToken &IntTok = Parser.getTok();
916 if (IntTok.isNot(AsmToken::Integer))
917 return Error(IntTok.getLoc(), "expected stack index");
918 switch (IntTok.getIntVal()) {
919 case 0: RegNo = X86::ST0; break;
920 case 1: RegNo = X86::ST1; break;
921 case 2: RegNo = X86::ST2; break;
922 case 3: RegNo = X86::ST3; break;
923 case 4: RegNo = X86::ST4; break;
924 case 5: RegNo = X86::ST5; break;
925 case 6: RegNo = X86::ST6; break;
926 case 7: RegNo = X86::ST7; break;
927 default: return Error(IntTok.getLoc(), "invalid stack index");
930 if (getParser().Lex().isNot(AsmToken::RParen))
931 return Error(Parser.getTok().getLoc(), "expected ')'");
933 EndLoc = Parser.getTok().getEndLoc();
934 Parser.Lex(); // Eat ')'
938 EndLoc = Parser.getTok().getEndLoc();
940 // If this is "db[0-7]", match it as an alias
942 if (RegNo == 0 && Tok.getString().size() == 3 &&
943 Tok.getString().startswith("db")) {
944 switch (Tok.getString()[2]) {
945 case '0': RegNo = X86::DR0; break;
946 case '1': RegNo = X86::DR1; break;
947 case '2': RegNo = X86::DR2; break;
948 case '3': RegNo = X86::DR3; break;
949 case '4': RegNo = X86::DR4; break;
950 case '5': RegNo = X86::DR5; break;
951 case '6': RegNo = X86::DR6; break;
952 case '7': RegNo = X86::DR7; break;
956 EndLoc = Parser.getTok().getEndLoc();
957 Parser.Lex(); // Eat it.
963 if (isParsingIntelSyntax()) return true;
964 return Error(StartLoc, "invalid register name",
965 SMRange(StartLoc, EndLoc));
968 Parser.Lex(); // Eat identifier token.
972 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
973 Instrumentation->SetInitialFrameRegister(RegNo);
976 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
978 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
979 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
980 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
981 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
984 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
986 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
987 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
988 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
989 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
992 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
993 if (isParsingIntelSyntax())
994 return ParseIntelOperand();
995 return ParseATTOperand();
998 /// getIntelMemOperandSize - Return intel memory operand size.
999 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1000 unsigned Size = StringSwitch<unsigned>(OpStr)
1001 .Cases("BYTE", "byte", 8)
1002 .Cases("WORD", "word", 16)
1003 .Cases("DWORD", "dword", 32)
1004 .Cases("QWORD", "qword", 64)
1005 .Cases("XWORD", "xword", 80)
1006 .Cases("XMMWORD", "xmmword", 128)
1007 .Cases("YMMWORD", "ymmword", 256)
1008 .Cases("ZMMWORD", "zmmword", 512)
1009 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1014 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1015 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1016 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1017 InlineAsmIdentifierInfo &Info) {
1018 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1019 // some other label reference.
1020 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1021 // Insert an explicit size if the user didn't have one.
1023 Size = getPointerWidth();
1024 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1028 // Create an absolute memory reference in order to match against
1029 // instructions taking a PC relative operand.
1030 return X86Operand::CreateMem(Disp, Start, End, Size, Identifier,
1034 // We either have a direct symbol reference, or an offset from a symbol. The
1035 // parser always puts the symbol on the LHS, so look there for size
1036 // calculation purposes.
1037 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1039 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1042 Size = Info.Type * 8; // Size is in terms of bits in this context.
1044 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1049 // When parsing inline assembly we set the base register to a non-zero value
1050 // if we don't know the actual value at this time. This is necessary to
1051 // get the matching correct in some cases.
1052 BaseReg = BaseReg ? BaseReg : 1;
1053 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1054 End, Size, Identifier, Info.OpDecl);
1058 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1059 StringRef SymName, int64_t ImmDisp,
1060 int64_t FinalImmDisp, SMLoc &BracLoc,
1061 SMLoc &StartInBrac, SMLoc &End) {
1062 // Remove the '[' and ']' from the IR string.
1063 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1064 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1066 // If ImmDisp is non-zero, then we parsed a displacement before the
1067 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1068 // If ImmDisp doesn't match the displacement computed by the state machine
1069 // then we have an additional displacement in the bracketed expression.
1070 if (ImmDisp != FinalImmDisp) {
1072 // We have an immediate displacement before the bracketed expression.
1073 // Adjust this to match the final immediate displacement.
1075 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1076 E = AsmRewrites->end(); I != E; ++I) {
1077 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1079 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1080 assert (!Found && "ImmDisp already rewritten.");
1081 (*I).Kind = AOK_Imm;
1082 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1083 (*I).Val = FinalImmDisp;
1088 assert (Found && "Unable to rewrite ImmDisp.");
1091 // We have a symbolic and an immediate displacement, but no displacement
1092 // before the bracketed expression. Put the immediate displacement
1093 // before the bracketed expression.
1094 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1097 // Remove all the ImmPrefix rewrites within the brackets.
1098 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1099 E = AsmRewrites->end(); I != E; ++I) {
1100 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1102 if ((*I).Kind == AOK_ImmPrefix)
1103 (*I).Kind = AOK_Delete;
1105 const char *SymLocPtr = SymName.data();
1106 // Skip everything before the symbol.
1107 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1108 assert(Len > 0 && "Expected a non-negative length.");
1109 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1111 // Skip everything after the symbol.
1112 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1113 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1114 assert(Len > 0 && "Expected a non-negative length.");
1115 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1119 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1120 MCAsmParser &Parser = getParser();
1121 const AsmToken &Tok = Parser.getTok();
1125 bool UpdateLocLex = true;
1127 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1128 // identifier. Don't try an parse it as a register.
1129 if (Tok.getString().startswith("."))
1132 // If we're parsing an immediate expression, we don't expect a '['.
1133 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1136 AsmToken::TokenKind TK = getLexer().getKind();
1139 if (SM.isValidEndState()) {
1143 return Error(Tok.getLoc(), "unknown token in expression");
1145 case AsmToken::EndOfStatement: {
1149 case AsmToken::String:
1150 case AsmToken::Identifier: {
1151 // This could be a register or a symbolic displacement.
1154 SMLoc IdentLoc = Tok.getLoc();
1155 StringRef Identifier = Tok.getString();
1156 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1157 SM.onRegister(TmpReg);
1158 UpdateLocLex = false;
1161 if (!isParsingInlineAsm()) {
1162 if (getParser().parsePrimaryExpr(Val, End))
1163 return Error(Tok.getLoc(), "Unexpected identifier!");
1165 // This is a dot operator, not an adjacent identifier.
1166 if (Identifier.find('.') != StringRef::npos) {
1169 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1170 if (ParseIntelIdentifier(Val, Identifier, Info,
1171 /*Unevaluated=*/false, End))
1175 SM.onIdentifierExpr(Val, Identifier);
1176 UpdateLocLex = false;
1179 return Error(Tok.getLoc(), "Unexpected identifier!");
1181 case AsmToken::Integer: {
1183 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1184 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1186 // Look for 'b' or 'f' following an Integer as a directional label
1187 SMLoc Loc = getTok().getLoc();
1188 int64_t IntVal = getTok().getIntVal();
1189 End = consumeToken();
1190 UpdateLocLex = false;
1191 if (getLexer().getKind() == AsmToken::Identifier) {
1192 StringRef IDVal = getTok().getString();
1193 if (IDVal == "f" || IDVal == "b") {
1195 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1196 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1198 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1199 if (IDVal == "b" && Sym->isUndefined())
1200 return Error(Loc, "invalid reference to undefined symbol");
1201 StringRef Identifier = Sym->getName();
1202 SM.onIdentifierExpr(Val, Identifier);
1203 End = consumeToken();
1205 if (SM.onInteger(IntVal, ErrMsg))
1206 return Error(Loc, ErrMsg);
1209 if (SM.onInteger(IntVal, ErrMsg))
1210 return Error(Loc, ErrMsg);
1214 case AsmToken::Plus: SM.onPlus(); break;
1215 case AsmToken::Minus: SM.onMinus(); break;
1216 case AsmToken::Tilde: SM.onNot(); break;
1217 case AsmToken::Star: SM.onStar(); break;
1218 case AsmToken::Slash: SM.onDivide(); break;
1219 case AsmToken::Pipe: SM.onOr(); break;
1220 case AsmToken::Amp: SM.onAnd(); break;
1221 case AsmToken::LessLess:
1222 SM.onLShift(); break;
1223 case AsmToken::GreaterGreater:
1224 SM.onRShift(); break;
1225 case AsmToken::LBrac: SM.onLBrac(); break;
1226 case AsmToken::RBrac: SM.onRBrac(); break;
1227 case AsmToken::LParen: SM.onLParen(); break;
1228 case AsmToken::RParen: SM.onRParen(); break;
1231 return Error(Tok.getLoc(), "unknown token in expression");
1233 if (!Done && UpdateLocLex)
1234 End = consumeToken();
1239 std::unique_ptr<X86Operand>
1240 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1241 int64_t ImmDisp, unsigned Size) {
1242 MCAsmParser &Parser = getParser();
1243 const AsmToken &Tok = Parser.getTok();
1244 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1245 if (getLexer().isNot(AsmToken::LBrac))
1246 return ErrorOperand(BracLoc, "Expected '[' token!");
1247 Parser.Lex(); // Eat '['
1249 SMLoc StartInBrac = Tok.getLoc();
1250 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1251 // may have already parsed an immediate displacement before the bracketed
1253 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1254 if (ParseIntelExpression(SM, End))
1257 const MCExpr *Disp = nullptr;
1258 if (const MCExpr *Sym = SM.getSym()) {
1259 // A symbolic displacement.
1261 if (isParsingInlineAsm())
1262 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1263 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1267 if (SM.getImm() || !Disp) {
1268 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1270 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1272 Disp = Imm; // An immediate displacement only.
1275 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1276 // will in fact do global lookup the field name inside all global typedefs,
1277 // but we don't emulate that.
1278 if (Tok.getString().find('.') != StringRef::npos) {
1279 const MCExpr *NewDisp;
1280 if (ParseIntelDotOperator(Disp, NewDisp))
1283 End = Tok.getEndLoc();
1284 Parser.Lex(); // Eat the field.
1288 int BaseReg = SM.getBaseReg();
1289 int IndexReg = SM.getIndexReg();
1290 int Scale = SM.getScale();
1291 if (!isParsingInlineAsm()) {
1293 if (!BaseReg && !IndexReg) {
1295 return X86Operand::CreateMem(Disp, Start, End, Size);
1297 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1300 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1301 Error(StartInBrac, ErrMsg);
1304 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1308 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1309 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1310 End, Size, SM.getSymName(), Info);
1313 // Inline assembly may use variable names with namespace alias qualifiers.
1314 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1315 StringRef &Identifier,
1316 InlineAsmIdentifierInfo &Info,
1317 bool IsUnevaluatedOperand, SMLoc &End) {
1318 MCAsmParser &Parser = getParser();
1319 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1322 StringRef LineBuf(Identifier.data());
1324 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1326 const AsmToken &Tok = Parser.getTok();
1327 SMLoc Loc = Tok.getLoc();
1329 // Advance the token stream until the end of the current token is
1330 // after the end of what the frontend claimed.
1331 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1333 End = Tok.getEndLoc();
1336 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1337 if (End.getPointer() == EndPtr) break;
1339 Identifier = LineBuf;
1341 // If the identifier lookup was unsuccessful, assume that we are dealing with
1344 StringRef InternalName =
1345 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1347 assert(InternalName.size() && "We should have an internal name here.");
1348 // Push a rewrite for replacing the identifier name with the internal name.
1349 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1354 // Create the symbol reference.
1355 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1356 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1357 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1361 /// \brief Parse intel style segment override.
1362 std::unique_ptr<X86Operand>
1363 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1365 MCAsmParser &Parser = getParser();
1366 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1367 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1368 if (Tok.isNot(AsmToken::Colon))
1369 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1370 Parser.Lex(); // Eat ':'
1372 int64_t ImmDisp = 0;
1373 if (getLexer().is(AsmToken::Integer)) {
1374 ImmDisp = Tok.getIntVal();
1375 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1377 if (isParsingInlineAsm())
1378 InstInfo->AsmRewrites->push_back(
1379 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1381 if (getLexer().isNot(AsmToken::LBrac)) {
1382 // An immediate following a 'segment register', 'colon' token sequence can
1383 // be followed by a bracketed expression. If it isn't we know we have our
1384 // final segment override.
1385 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1386 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1387 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1392 if (getLexer().is(AsmToken::LBrac))
1393 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1397 if (!isParsingInlineAsm()) {
1398 if (getParser().parsePrimaryExpr(Val, End))
1399 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1401 return X86Operand::CreateMem(Val, Start, End, Size);
1404 InlineAsmIdentifierInfo Info;
1405 StringRef Identifier = Tok.getString();
1406 if (ParseIntelIdentifier(Val, Identifier, Info,
1407 /*Unevaluated=*/false, End))
1409 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1410 /*Scale=*/1, Start, End, Size, Identifier, Info);
1413 /// ParseIntelMemOperand - Parse intel style memory operand.
1414 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1417 MCAsmParser &Parser = getParser();
1418 const AsmToken &Tok = Parser.getTok();
1421 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1422 if (getLexer().is(AsmToken::LBrac))
1423 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1424 assert(ImmDisp == 0);
1427 if (!isParsingInlineAsm()) {
1428 if (getParser().parsePrimaryExpr(Val, End))
1429 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1431 return X86Operand::CreateMem(Val, Start, End, Size);
1434 InlineAsmIdentifierInfo Info;
1435 StringRef Identifier = Tok.getString();
1436 if (ParseIntelIdentifier(Val, Identifier, Info,
1437 /*Unevaluated=*/false, End))
1440 if (!getLexer().is(AsmToken::LBrac))
1441 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1442 /*Scale=*/1, Start, End, Size, Identifier, Info);
1444 Parser.Lex(); // Eat '['
1446 // Parse Identifier [ ImmDisp ]
1447 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1448 /*AddImmPrefix=*/false);
1449 if (ParseIntelExpression(SM, End))
1453 Error(Start, "cannot use more than one symbol in memory operand");
1456 if (SM.getBaseReg()) {
1457 Error(Start, "cannot use base register with variable reference");
1460 if (SM.getIndexReg()) {
1461 Error(Start, "cannot use index register with variable reference");
1465 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1466 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1467 // we're pointing to a local variable in memory, so the base register is
1468 // really the frame or stack pointer.
1469 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1470 /*Scale=*/1, Start, End, Size, Identifier,
1474 /// Parse the '.' operator.
1475 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1476 const MCExpr *&NewDisp) {
1477 MCAsmParser &Parser = getParser();
1478 const AsmToken &Tok = Parser.getTok();
1479 int64_t OrigDispVal, DotDispVal;
1481 // FIXME: Handle non-constant expressions.
1482 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1483 OrigDispVal = OrigDisp->getValue();
1485 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1487 // Drop the optional '.'.
1488 StringRef DotDispStr = Tok.getString();
1489 if (DotDispStr.startswith("."))
1490 DotDispStr = DotDispStr.drop_front(1);
1492 // .Imm gets lexed as a real.
1493 if (Tok.is(AsmToken::Real)) {
1495 DotDispStr.getAsInteger(10, DotDisp);
1496 DotDispVal = DotDisp.getZExtValue();
1497 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1499 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1500 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1502 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1503 DotDispVal = DotDisp;
1505 return Error(Tok.getLoc(), "Unexpected token type!");
1507 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1508 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1509 unsigned Len = DotDispStr.size();
1510 unsigned Val = OrigDispVal + DotDispVal;
1511 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1515 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1519 /// Parse the 'offset' operator. This operator is used to specify the
1520 /// location rather then the content of a variable.
1521 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1522 MCAsmParser &Parser = getParser();
1523 const AsmToken &Tok = Parser.getTok();
1524 SMLoc OffsetOfLoc = Tok.getLoc();
1525 Parser.Lex(); // Eat offset.
1528 InlineAsmIdentifierInfo Info;
1529 SMLoc Start = Tok.getLoc(), End;
1530 StringRef Identifier = Tok.getString();
1531 if (ParseIntelIdentifier(Val, Identifier, Info,
1532 /*Unevaluated=*/false, End))
1535 // Don't emit the offset operator.
1536 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1538 // The offset operator will have an 'r' constraint, thus we need to create
1539 // register operand to ensure proper matching. Just pick a GPR based on
1540 // the size of a pointer.
1542 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1543 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1544 OffsetOfLoc, Identifier, Info.OpDecl);
1547 enum IntelOperatorKind {
1553 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1554 /// returns the number of elements in an array. It returns the value 1 for
1555 /// non-array variables. The SIZE operator returns the size of a C or C++
1556 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1557 /// TYPE operator returns the size of a C or C++ type or variable. If the
1558 /// variable is an array, TYPE returns the size of a single element.
1559 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1560 MCAsmParser &Parser = getParser();
1561 const AsmToken &Tok = Parser.getTok();
1562 SMLoc TypeLoc = Tok.getLoc();
1563 Parser.Lex(); // Eat operator.
1565 const MCExpr *Val = nullptr;
1566 InlineAsmIdentifierInfo Info;
1567 SMLoc Start = Tok.getLoc(), End;
1568 StringRef Identifier = Tok.getString();
1569 if (ParseIntelIdentifier(Val, Identifier, Info,
1570 /*Unevaluated=*/true, End))
1574 return ErrorOperand(Start, "unable to lookup expression");
1578 default: llvm_unreachable("Unexpected operand kind!");
1579 case IOK_LENGTH: CVal = Info.Length; break;
1580 case IOK_SIZE: CVal = Info.Size; break;
1581 case IOK_TYPE: CVal = Info.Type; break;
1584 // Rewrite the type operator and the C or C++ type or variable in terms of an
1585 // immediate. E.g. TYPE foo -> $$4
1586 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1587 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1589 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1590 return X86Operand::CreateImm(Imm, Start, End);
1593 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1594 MCAsmParser &Parser = getParser();
1595 const AsmToken &Tok = Parser.getTok();
1598 // Offset, length, type and size operators.
1599 if (isParsingInlineAsm()) {
1600 StringRef AsmTokStr = Tok.getString();
1601 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1602 return ParseIntelOffsetOfOperator();
1603 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1604 return ParseIntelOperator(IOK_LENGTH);
1605 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1606 return ParseIntelOperator(IOK_SIZE);
1607 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1608 return ParseIntelOperator(IOK_TYPE);
1611 unsigned Size = getIntelMemOperandSize(Tok.getString());
1613 Parser.Lex(); // Eat operand size (e.g., byte, word).
1614 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1615 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1616 Parser.Lex(); // Eat ptr.
1618 Start = Tok.getLoc();
1621 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1622 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1623 AsmToken StartTok = Tok;
1624 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1625 /*AddImmPrefix=*/false);
1626 if (ParseIntelExpression(SM, End))
1629 int64_t Imm = SM.getImm();
1630 if (isParsingInlineAsm()) {
1631 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1632 if (StartTok.getString().size() == Len)
1633 // Just add a prefix if this wasn't a complex immediate expression.
1634 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1636 // Otherwise, rewrite the complex expression as a single immediate.
1637 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1640 if (getLexer().isNot(AsmToken::LBrac)) {
1641 // If a directional label (ie. 1f or 2b) was parsed above from
1642 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1643 // to the MCExpr with the directional local symbol and this is a
1644 // memory operand not an immediate operand.
1646 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1648 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1649 return X86Operand::CreateImm(ImmExpr, Start, End);
1652 // Only positive immediates are valid.
1654 return ErrorOperand(Start, "expected a positive immediate displacement "
1655 "before bracketed expr.");
1657 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1658 return ParseIntelMemOperand(Imm, Start, Size);
1663 if (!ParseRegister(RegNo, Start, End)) {
1664 // If this is a segment register followed by a ':', then this is the start
1665 // of a segment override, otherwise this is a normal register reference.
1666 if (getLexer().isNot(AsmToken::Colon))
1667 return X86Operand::CreateReg(RegNo, Start, End);
1669 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1673 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1676 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1677 MCAsmParser &Parser = getParser();
1678 switch (getLexer().getKind()) {
1680 // Parse a memory operand with no segment register.
1681 return ParseMemOperand(0, Parser.getTok().getLoc());
1682 case AsmToken::Percent: {
1683 // Read the register.
1686 if (ParseRegister(RegNo, Start, End)) return nullptr;
1687 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1688 Error(Start, "%eiz and %riz can only be used as index registers",
1689 SMRange(Start, End));
1693 // If this is a segment register followed by a ':', then this is the start
1694 // of a memory reference, otherwise this is a normal register reference.
1695 if (getLexer().isNot(AsmToken::Colon))
1696 return X86Operand::CreateReg(RegNo, Start, End);
1698 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1699 return ErrorOperand(Start, "invalid segment register");
1701 getParser().Lex(); // Eat the colon.
1702 return ParseMemOperand(RegNo, Start);
1704 case AsmToken::Dollar: {
1705 // $42 -> immediate.
1706 SMLoc Start = Parser.getTok().getLoc(), End;
1709 if (getParser().parseExpression(Val, End))
1711 return X86Operand::CreateImm(Val, Start, End);
1716 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1717 const MCParsedAsmOperand &Op) {
1718 MCAsmParser &Parser = getParser();
1719 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1720 if (getLexer().is(AsmToken::LCurly)) {
1721 // Eat "{" and mark the current place.
1722 const SMLoc consumedToken = consumeToken();
1723 // Distinguish {1to<NUM>} from {%k<NUM>}.
1724 if(getLexer().is(AsmToken::Integer)) {
1725 // Parse memory broadcasting ({1to<NUM>}).
1726 if (getLexer().getTok().getIntVal() != 1)
1727 return !ErrorAndEatStatement(getLexer().getLoc(),
1728 "Expected 1to<NUM> at this point");
1729 Parser.Lex(); // Eat "1" of 1to8
1730 if (!getLexer().is(AsmToken::Identifier) ||
1731 !getLexer().getTok().getIdentifier().startswith("to"))
1732 return !ErrorAndEatStatement(getLexer().getLoc(),
1733 "Expected 1to<NUM> at this point");
1734 // Recognize only reasonable suffixes.
1735 const char *BroadcastPrimitive =
1736 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1737 .Case("to2", "{1to2}")
1738 .Case("to4", "{1to4}")
1739 .Case("to8", "{1to8}")
1740 .Case("to16", "{1to16}")
1742 if (!BroadcastPrimitive)
1743 return !ErrorAndEatStatement(getLexer().getLoc(),
1744 "Invalid memory broadcast primitive.");
1745 Parser.Lex(); // Eat "toN" of 1toN
1746 if (!getLexer().is(AsmToken::RCurly))
1747 return !ErrorAndEatStatement(getLexer().getLoc(),
1748 "Expected } at this point");
1749 Parser.Lex(); // Eat "}"
1750 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1752 // No AVX512 specific primitives can pass
1753 // after memory broadcasting, so return.
1756 // Parse mask register {%k1}
1757 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1758 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1759 Operands.push_back(std::move(Op));
1760 if (!getLexer().is(AsmToken::RCurly))
1761 return !ErrorAndEatStatement(getLexer().getLoc(),
1762 "Expected } at this point");
1763 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1765 // Parse "zeroing non-masked" semantic {z}
1766 if (getLexer().is(AsmToken::LCurly)) {
1767 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1768 if (!getLexer().is(AsmToken::Identifier) ||
1769 getLexer().getTok().getIdentifier() != "z")
1770 return !ErrorAndEatStatement(getLexer().getLoc(),
1771 "Expected z at this point");
1772 Parser.Lex(); // Eat the z
1773 if (!getLexer().is(AsmToken::RCurly))
1774 return !ErrorAndEatStatement(getLexer().getLoc(),
1775 "Expected } at this point");
1776 Parser.Lex(); // Eat the }
1785 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1786 /// has already been parsed if present.
1787 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1790 MCAsmParser &Parser = getParser();
1791 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1792 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1793 // only way to do this without lookahead is to eat the '(' and see what is
1795 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1796 if (getLexer().isNot(AsmToken::LParen)) {
1798 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1800 // After parsing the base expression we could either have a parenthesized
1801 // memory address or not. If not, return now. If so, eat the (.
1802 if (getLexer().isNot(AsmToken::LParen)) {
1803 // Unless we have a segment register, treat this as an immediate.
1805 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1806 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1812 // Okay, we have a '('. We don't know if this is an expression or not, but
1813 // so we have to eat the ( to see beyond it.
1814 SMLoc LParenLoc = Parser.getTok().getLoc();
1815 Parser.Lex(); // Eat the '('.
1817 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1818 // Nothing to do here, fall into the code below with the '(' part of the
1819 // memory operand consumed.
1823 // It must be an parenthesized expression, parse it now.
1824 if (getParser().parseParenExpression(Disp, ExprEnd))
1827 // After parsing the base expression we could either have a parenthesized
1828 // memory address or not. If not, return now. If so, eat the (.
1829 if (getLexer().isNot(AsmToken::LParen)) {
1830 // Unless we have a segment register, treat this as an immediate.
1832 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1833 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1841 // If we reached here, then we just ate the ( of the memory operand. Process
1842 // the rest of the memory operand.
1843 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1844 SMLoc IndexLoc, BaseLoc;
1846 if (getLexer().is(AsmToken::Percent)) {
1847 SMLoc StartLoc, EndLoc;
1848 BaseLoc = Parser.getTok().getLoc();
1849 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1850 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1851 Error(StartLoc, "eiz and riz can only be used as index registers",
1852 SMRange(StartLoc, EndLoc));
1857 if (getLexer().is(AsmToken::Comma)) {
1858 Parser.Lex(); // Eat the comma.
1859 IndexLoc = Parser.getTok().getLoc();
1861 // Following the comma we should have either an index register, or a scale
1862 // value. We don't support the later form, but we want to parse it
1865 // Not that even though it would be completely consistent to support syntax
1866 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1867 if (getLexer().is(AsmToken::Percent)) {
1869 if (ParseRegister(IndexReg, L, L)) return nullptr;
1871 if (getLexer().isNot(AsmToken::RParen)) {
1872 // Parse the scale amount:
1873 // ::= ',' [scale-expression]
1874 if (getLexer().isNot(AsmToken::Comma)) {
1875 Error(Parser.getTok().getLoc(),
1876 "expected comma in scale expression");
1879 Parser.Lex(); // Eat the comma.
1881 if (getLexer().isNot(AsmToken::RParen)) {
1882 SMLoc Loc = Parser.getTok().getLoc();
1885 if (getParser().parseAbsoluteExpression(ScaleVal)){
1886 Error(Loc, "expected scale expression");
1890 // Validate the scale amount.
1891 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1893 Error(Loc, "scale factor in 16-bit address must be 1");
1896 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1897 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1900 Scale = (unsigned)ScaleVal;
1903 } else if (getLexer().isNot(AsmToken::RParen)) {
1904 // A scale amount without an index is ignored.
1906 SMLoc Loc = Parser.getTok().getLoc();
1909 if (getParser().parseAbsoluteExpression(Value))
1913 Warning(Loc, "scale factor without index register is ignored");
1918 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1919 if (getLexer().isNot(AsmToken::RParen)) {
1920 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1923 SMLoc MemEnd = Parser.getTok().getEndLoc();
1924 Parser.Lex(); // Eat the ')'.
1926 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1927 // and then only in non-64-bit modes. Except for DX, which is a special case
1928 // because an unofficial form of in/out instructions uses it.
1929 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1930 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1931 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1932 BaseReg != X86::DX) {
1933 Error(BaseLoc, "invalid 16-bit base register");
1937 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1938 Error(IndexLoc, "16-bit memory operand may not include only index register");
1943 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1944 Error(BaseLoc, ErrMsg);
1948 if (SegReg || BaseReg || IndexReg)
1949 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1951 return X86Operand::CreateMem(Disp, MemStart, MemEnd);
1954 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1955 SMLoc NameLoc, OperandVector &Operands) {
1956 MCAsmParser &Parser = getParser();
1958 StringRef PatchedName = Name;
1960 // FIXME: Hack to recognize setneb as setne.
1961 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1962 PatchedName != "setb" && PatchedName != "setnb")
1963 PatchedName = PatchedName.substr(0, Name.size()-1);
1965 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1966 const MCExpr *ExtraImmOp = nullptr;
1967 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1968 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1969 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1970 bool IsVCMP = PatchedName[0] == 'v';
1971 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1972 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1973 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1977 .Case("unord", 0x03)
1982 /* AVX only from here */
1983 .Case("eq_uq", 0x08)
1986 .Case("false", 0x0B)
1987 .Case("neq_oq", 0x0C)
1991 .Case("eq_os", 0x10)
1992 .Case("lt_oq", 0x11)
1993 .Case("le_oq", 0x12)
1994 .Case("unord_s", 0x13)
1995 .Case("neq_us", 0x14)
1996 .Case("nlt_uq", 0x15)
1997 .Case("nle_uq", 0x16)
1998 .Case("ord_s", 0x17)
1999 .Case("eq_us", 0x18)
2000 .Case("nge_uq", 0x19)
2001 .Case("ngt_uq", 0x1A)
2002 .Case("false_os", 0x1B)
2003 .Case("neq_os", 0x1C)
2004 .Case("ge_oq", 0x1D)
2005 .Case("gt_oq", 0x1E)
2006 .Case("true_us", 0x1F)
2008 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
2009 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
2010 getParser().getContext());
2011 if (PatchedName.endswith("ss")) {
2012 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
2013 } else if (PatchedName.endswith("sd")) {
2014 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
2015 } else if (PatchedName.endswith("ps")) {
2016 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
2018 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
2019 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
2024 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2026 if (ExtraImmOp && !isParsingIntelSyntax())
2027 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2029 // Determine whether this is an instruction prefix.
2031 Name == "lock" || Name == "rep" ||
2032 Name == "repe" || Name == "repz" ||
2033 Name == "repne" || Name == "repnz" ||
2034 Name == "rex64" || Name == "data16";
2037 // This does the actual operand parsing. Don't parse any more if we have a
2038 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2039 // just want to parse the "lock" as the first instruction and the "incl" as
2041 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2043 // Parse '*' modifier.
2044 if (getLexer().is(AsmToken::Star))
2045 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2047 // Read the operands.
2049 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2050 Operands.push_back(std::move(Op));
2051 if (!HandleAVX512Operand(Operands, *Operands.back()))
2054 Parser.eatToEndOfStatement();
2057 // check for comma and eat it
2058 if (getLexer().is(AsmToken::Comma))
2064 if (getLexer().isNot(AsmToken::EndOfStatement))
2065 return ErrorAndEatStatement(getLexer().getLoc(),
2066 "unexpected token in argument list");
2069 // Consume the EndOfStatement or the prefix separator Slash
2070 if (getLexer().is(AsmToken::EndOfStatement) ||
2071 (isPrefix && getLexer().is(AsmToken::Slash)))
2074 if (ExtraImmOp && isParsingIntelSyntax())
2075 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2077 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2078 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2079 // documented form in various unofficial manuals, so a lot of code uses it.
2080 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2081 Operands.size() == 3) {
2082 X86Operand &Op = (X86Operand &)*Operands.back();
2083 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2084 isa<MCConstantExpr>(Op.Mem.Disp) &&
2085 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2086 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2087 SMLoc Loc = Op.getEndLoc();
2088 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2091 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2092 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2093 Operands.size() == 3) {
2094 X86Operand &Op = (X86Operand &)*Operands[1];
2095 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2096 isa<MCConstantExpr>(Op.Mem.Disp) &&
2097 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2098 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2099 SMLoc Loc = Op.getEndLoc();
2100 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2104 // Append default arguments to "ins[bwld]"
2105 if (Name.startswith("ins") && Operands.size() == 1 &&
2106 (Name == "insb" || Name == "insw" || Name == "insl" ||
2108 if (isParsingIntelSyntax()) {
2109 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2110 Operands.push_back(DefaultMemDIOperand(NameLoc));
2112 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2113 Operands.push_back(DefaultMemDIOperand(NameLoc));
2117 // Append default arguments to "outs[bwld]"
2118 if (Name.startswith("outs") && Operands.size() == 1 &&
2119 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2120 Name == "outsd" )) {
2121 if (isParsingIntelSyntax()) {
2122 Operands.push_back(DefaultMemSIOperand(NameLoc));
2123 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2125 Operands.push_back(DefaultMemSIOperand(NameLoc));
2126 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2130 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2131 // values of $SIREG according to the mode. It would be nice if this
2132 // could be achieved with InstAlias in the tables.
2133 if (Name.startswith("lods") && Operands.size() == 1 &&
2134 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2135 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2136 Operands.push_back(DefaultMemSIOperand(NameLoc));
2138 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2139 // values of $DIREG according to the mode. It would be nice if this
2140 // could be achieved with InstAlias in the tables.
2141 if (Name.startswith("stos") && Operands.size() == 1 &&
2142 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2143 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2144 Operands.push_back(DefaultMemDIOperand(NameLoc));
2146 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2147 // values of $DIREG according to the mode. It would be nice if this
2148 // could be achieved with InstAlias in the tables.
2149 if (Name.startswith("scas") && Operands.size() == 1 &&
2150 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2151 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2152 Operands.push_back(DefaultMemDIOperand(NameLoc));
2154 // Add default SI and DI operands to "cmps[bwlq]".
2155 if (Name.startswith("cmps") &&
2156 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2157 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2158 if (Operands.size() == 1) {
2159 if (isParsingIntelSyntax()) {
2160 Operands.push_back(DefaultMemSIOperand(NameLoc));
2161 Operands.push_back(DefaultMemDIOperand(NameLoc));
2163 Operands.push_back(DefaultMemDIOperand(NameLoc));
2164 Operands.push_back(DefaultMemSIOperand(NameLoc));
2166 } else if (Operands.size() == 3) {
2167 X86Operand &Op = (X86Operand &)*Operands[1];
2168 X86Operand &Op2 = (X86Operand &)*Operands[2];
2169 if (!doSrcDstMatch(Op, Op2))
2170 return Error(Op.getStartLoc(),
2171 "mismatching source and destination index registers");
2175 // Add default SI and DI operands to "movs[bwlq]".
2176 if ((Name.startswith("movs") &&
2177 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2178 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2179 (Name.startswith("smov") &&
2180 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2181 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2182 if (Operands.size() == 1) {
2183 if (Name == "movsd")
2184 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2185 if (isParsingIntelSyntax()) {
2186 Operands.push_back(DefaultMemDIOperand(NameLoc));
2187 Operands.push_back(DefaultMemSIOperand(NameLoc));
2189 Operands.push_back(DefaultMemSIOperand(NameLoc));
2190 Operands.push_back(DefaultMemDIOperand(NameLoc));
2192 } else if (Operands.size() == 3) {
2193 X86Operand &Op = (X86Operand &)*Operands[1];
2194 X86Operand &Op2 = (X86Operand &)*Operands[2];
2195 if (!doSrcDstMatch(Op, Op2))
2196 return Error(Op.getStartLoc(),
2197 "mismatching source and destination index registers");
2201 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2203 if ((Name.startswith("shr") || Name.startswith("sar") ||
2204 Name.startswith("shl") || Name.startswith("sal") ||
2205 Name.startswith("rcl") || Name.startswith("rcr") ||
2206 Name.startswith("rol") || Name.startswith("ror")) &&
2207 Operands.size() == 3) {
2208 if (isParsingIntelSyntax()) {
2210 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2211 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2212 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2213 Operands.pop_back();
2215 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2216 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2217 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2218 Operands.erase(Operands.begin() + 1);
2222 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2223 // instalias with an immediate operand yet.
2224 if (Name == "int" && Operands.size() == 2) {
2225 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2226 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2227 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2228 Operands.erase(Operands.begin() + 1);
2229 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2236 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2239 TmpInst.setOpcode(Opcode);
2241 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2242 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2243 TmpInst.addOperand(Inst.getOperand(0));
2248 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2249 bool isCmp = false) {
2250 if (!Inst.getOperand(0).isImm() ||
2251 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2254 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2257 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2258 bool isCmp = false) {
2259 if (!Inst.getOperand(0).isImm() ||
2260 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2263 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2266 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2267 bool isCmp = false) {
2268 if (!Inst.getOperand(0).isImm() ||
2269 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2272 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2275 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2276 switch (Inst.getOpcode()) {
2277 default: return false;
2278 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2279 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2280 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2281 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2282 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2283 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2284 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2285 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2286 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2287 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2288 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2289 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2290 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2291 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2292 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2293 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2294 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2295 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2296 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2297 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2298 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2299 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2300 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2301 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2302 case X86::VMOVAPDrr:
2303 case X86::VMOVAPDYrr:
2304 case X86::VMOVAPSrr:
2305 case X86::VMOVAPSYrr:
2306 case X86::VMOVDQArr:
2307 case X86::VMOVDQAYrr:
2308 case X86::VMOVDQUrr:
2309 case X86::VMOVDQUYrr:
2310 case X86::VMOVUPDrr:
2311 case X86::VMOVUPDYrr:
2312 case X86::VMOVUPSrr:
2313 case X86::VMOVUPSYrr: {
2314 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2315 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2319 switch (Inst.getOpcode()) {
2320 default: llvm_unreachable("Invalid opcode");
2321 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2322 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2323 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2324 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2325 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2326 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2327 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2328 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2329 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2330 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2331 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2332 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2334 Inst.setOpcode(NewOpc);
2338 case X86::VMOVSSrr: {
2339 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2340 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2343 switch (Inst.getOpcode()) {
2344 default: llvm_unreachable("Invalid opcode");
2345 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2346 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2348 Inst.setOpcode(NewOpc);
2354 static const char *getSubtargetFeatureName(uint64_t Val);
2356 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2358 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2362 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2363 OperandVector &Operands,
2364 MCStreamer &Out, uint64_t &ErrorInfo,
2365 bool MatchingInlineAsm) {
2366 if (isParsingIntelSyntax())
2367 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2369 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2373 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2374 OperandVector &Operands, MCStreamer &Out,
2375 bool MatchingInlineAsm) {
2376 // FIXME: This should be replaced with a real .td file alias mechanism.
2377 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2379 const char *Repl = StringSwitch<const char *>(Op.getToken())
2380 .Case("finit", "fninit")
2381 .Case("fsave", "fnsave")
2382 .Case("fstcw", "fnstcw")
2383 .Case("fstcww", "fnstcw")
2384 .Case("fstenv", "fnstenv")
2385 .Case("fstsw", "fnstsw")
2386 .Case("fstsww", "fnstsw")
2387 .Case("fclex", "fnclex")
2391 Inst.setOpcode(X86::WAIT);
2393 if (!MatchingInlineAsm)
2394 EmitInstruction(Inst, Operands, Out);
2395 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2399 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2400 bool MatchingInlineAsm) {
2401 assert(ErrorInfo && "Unknown missing feature!");
2402 ArrayRef<SMRange> EmptyRanges = None;
2403 SmallString<126> Msg;
2404 raw_svector_ostream OS(Msg);
2405 OS << "instruction requires:";
2407 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2408 if (ErrorInfo & Mask)
2409 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2412 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2415 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2416 OperandVector &Operands,
2418 uint64_t &ErrorInfo,
2419 bool MatchingInlineAsm) {
2420 assert(!Operands.empty() && "Unexpect empty operand list!");
2421 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2422 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2423 ArrayRef<SMRange> EmptyRanges = None;
2425 // First, handle aliases that expand to multiple instructions.
2426 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2428 bool WasOriginallyInvalidOperand = false;
2431 // First, try a direct match.
2432 switch (MatchInstructionImpl(Operands, Inst,
2433 ErrorInfo, MatchingInlineAsm,
2434 isParsingIntelSyntax())) {
2437 // Some instructions need post-processing to, for example, tweak which
2438 // encoding is selected. Loop on it while changes happen so the
2439 // individual transformations can chain off each other.
2440 if (!MatchingInlineAsm)
2441 while (processInstruction(Inst, Operands))
2445 if (!MatchingInlineAsm)
2446 EmitInstruction(Inst, Operands, Out);
2447 Opcode = Inst.getOpcode();
2449 case Match_MissingFeature:
2450 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2451 case Match_InvalidOperand:
2452 WasOriginallyInvalidOperand = true;
2454 case Match_MnemonicFail:
2458 // FIXME: Ideally, we would only attempt suffix matches for things which are
2459 // valid prefixes, and we could just infer the right unambiguous
2460 // type. However, that requires substantially more matcher support than the
2463 // Change the operand to point to a temporary token.
2464 StringRef Base = Op.getToken();
2465 SmallString<16> Tmp;
2468 Op.setTokenValue(Tmp.str());
2470 // If this instruction starts with an 'f', then it is a floating point stack
2471 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2472 // 80-bit floating point, which use the suffixes s,l,t respectively.
2474 // Otherwise, we assume that this may be an integer instruction, which comes
2475 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2476 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2478 // Check for the various suffix matches.
2479 uint64_t ErrorInfoIgnore;
2480 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2483 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2484 Tmp.back() = Suffixes[I];
2485 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2486 MatchingInlineAsm, isParsingIntelSyntax());
2487 // If this returned as a missing feature failure, remember that.
2488 if (Match[I] == Match_MissingFeature)
2489 ErrorInfoMissingFeature = ErrorInfoIgnore;
2492 // Restore the old token.
2493 Op.setTokenValue(Base);
2495 // If exactly one matched, then we treat that as a successful match (and the
2496 // instruction will already have been filled in correctly, since the failing
2497 // matches won't have modified it).
2498 unsigned NumSuccessfulMatches =
2499 std::count(std::begin(Match), std::end(Match), Match_Success);
2500 if (NumSuccessfulMatches == 1) {
2502 if (!MatchingInlineAsm)
2503 EmitInstruction(Inst, Operands, Out);
2504 Opcode = Inst.getOpcode();
2508 // Otherwise, the match failed, try to produce a decent error message.
2510 // If we had multiple suffix matches, then identify this as an ambiguous
2512 if (NumSuccessfulMatches > 1) {
2514 unsigned NumMatches = 0;
2515 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2516 if (Match[I] == Match_Success)
2517 MatchChars[NumMatches++] = Suffixes[I];
2519 SmallString<126> Msg;
2520 raw_svector_ostream OS(Msg);
2521 OS << "ambiguous instructions require an explicit suffix (could be ";
2522 for (unsigned i = 0; i != NumMatches; ++i) {
2525 if (i + 1 == NumMatches)
2527 OS << "'" << Base << MatchChars[i] << "'";
2530 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2534 // Okay, we know that none of the variants matched successfully.
2536 // If all of the instructions reported an invalid mnemonic, then the original
2537 // mnemonic was invalid.
2538 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2539 if (!WasOriginallyInvalidOperand) {
2540 ArrayRef<SMRange> Ranges =
2541 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2542 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2543 Ranges, MatchingInlineAsm);
2546 // Recover location info for the operand if we know which was the problem.
2547 if (ErrorInfo != ~0ULL) {
2548 if (ErrorInfo >= Operands.size())
2549 return Error(IDLoc, "too few operands for instruction",
2550 EmptyRanges, MatchingInlineAsm);
2552 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2553 if (Operand.getStartLoc().isValid()) {
2554 SMRange OperandRange = Operand.getLocRange();
2555 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2556 OperandRange, MatchingInlineAsm);
2560 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2564 // If one instruction matched with a missing feature, report this as a
2566 if (std::count(std::begin(Match), std::end(Match),
2567 Match_MissingFeature) == 1) {
2568 ErrorInfo = ErrorInfoMissingFeature;
2569 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2573 // If one instruction matched with an invalid operand, report this as an
2575 if (std::count(std::begin(Match), std::end(Match),
2576 Match_InvalidOperand) == 1) {
2577 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2581 // If all of these were an outright failure, report it in a useless way.
2582 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2583 EmptyRanges, MatchingInlineAsm);
2587 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2588 OperandVector &Operands,
2590 uint64_t &ErrorInfo,
2591 bool MatchingInlineAsm) {
2592 assert(!Operands.empty() && "Unexpect empty operand list!");
2593 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2594 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2595 StringRef Mnemonic = Op.getToken();
2596 ArrayRef<SMRange> EmptyRanges = None;
2598 // First, handle aliases that expand to multiple instructions.
2599 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2603 // Find one unsized memory operand, if present.
2604 X86Operand *UnsizedMemOp = nullptr;
2605 for (const auto &Op : Operands) {
2606 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2607 if (X86Op->isMemUnsized())
2608 UnsizedMemOp = X86Op;
2611 // Allow some instructions to have implicitly pointer-sized operands. This is
2612 // compatible with gas.
2614 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2615 for (const char *Instr : PtrSizedInstrs) {
2616 if (Mnemonic == Instr) {
2617 UnsizedMemOp->Mem.Size = getPointerSize();
2623 // If an unsized memory operand is present, try to match with each memory
2624 // operand size. In Intel assembly, the size is not part of the instruction
2626 SmallVector<unsigned, 8> Match;
2627 uint64_t ErrorInfoMissingFeature = 0;
2628 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2629 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2630 for (unsigned Size : MopSizes) {
2631 UnsizedMemOp->Mem.Size = Size;
2632 uint64_t ErrorInfoIgnore;
2633 unsigned LastOpcode = Inst.getOpcode();
2635 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2636 MatchingInlineAsm, isParsingIntelSyntax());
2637 if (Match.empty() || LastOpcode != Inst.getOpcode())
2640 // If this returned as a missing feature failure, remember that.
2641 if (Match.back() == Match_MissingFeature)
2642 ErrorInfoMissingFeature = ErrorInfoIgnore;
2645 // Restore the size of the unsized memory operand if we modified it.
2647 UnsizedMemOp->Mem.Size = 0;
2650 // If we haven't matched anything yet, this is not a basic integer or FPU
2651 // operation. There shouldn't be any ambiguity in our mneumonic table, so try
2652 // matching with the unsized operand.
2653 if (Match.empty()) {
2654 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2656 isParsingIntelSyntax()));
2657 // If this returned as a missing feature failure, remember that.
2658 if (Match.back() == Match_MissingFeature)
2659 ErrorInfoMissingFeature = ErrorInfo;
2662 // Restore the size of the unsized memory operand if we modified it.
2664 UnsizedMemOp->Mem.Size = 0;
2666 // If it's a bad mnemonic, all results will be the same.
2667 if (Match.back() == Match_MnemonicFail) {
2668 ArrayRef<SMRange> Ranges =
2669 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2670 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2671 Ranges, MatchingInlineAsm);
2674 // If exactly one matched, then we treat that as a successful match (and the
2675 // instruction will already have been filled in correctly, since the failing
2676 // matches won't have modified it).
2677 unsigned NumSuccessfulMatches =
2678 std::count(std::begin(Match), std::end(Match), Match_Success);
2679 if (NumSuccessfulMatches == 1) {
2680 // Some instructions need post-processing to, for example, tweak which
2681 // encoding is selected. Loop on it while changes happen so the individual
2682 // transformations can chain off each other.
2683 if (!MatchingInlineAsm)
2684 while (processInstruction(Inst, Operands))
2687 if (!MatchingInlineAsm)
2688 EmitInstruction(Inst, Operands, Out);
2689 Opcode = Inst.getOpcode();
2691 } else if (NumSuccessfulMatches > 1) {
2692 assert(UnsizedMemOp &&
2693 "multiple matches only possible with unsized memory operands");
2694 ArrayRef<SMRange> Ranges =
2695 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2696 return Error(UnsizedMemOp->getStartLoc(),
2697 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2698 Ranges, MatchingInlineAsm);
2701 // If one instruction matched with a missing feature, report this as a
2703 if (std::count(std::begin(Match), std::end(Match),
2704 Match_MissingFeature) == 1) {
2705 ErrorInfo = ErrorInfoMissingFeature;
2706 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2710 // If one instruction matched with an invalid operand, report this as an
2712 if (std::count(std::begin(Match), std::end(Match),
2713 Match_InvalidOperand) == 1) {
2714 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2718 // If all of these were an outright failure, report it in a useless way.
2719 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2723 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2724 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2727 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2728 MCAsmParser &Parser = getParser();
2729 StringRef IDVal = DirectiveID.getIdentifier();
2730 if (IDVal == ".word")
2731 return ParseDirectiveWord(2, DirectiveID.getLoc());
2732 else if (IDVal.startswith(".code"))
2733 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2734 else if (IDVal.startswith(".att_syntax")) {
2735 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2736 if (Parser.getTok().getString() == "prefix")
2738 else if (Parser.getTok().getString() == "noprefix")
2739 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2740 "supported: registers must have a "
2741 "'%' prefix in .att_syntax");
2743 getParser().setAssemblerDialect(0);
2745 } else if (IDVal.startswith(".intel_syntax")) {
2746 getParser().setAssemblerDialect(1);
2747 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2748 if (Parser.getTok().getString() == "noprefix")
2750 else if (Parser.getTok().getString() == "prefix")
2751 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2752 "supported: registers must not have "
2753 "a '%' prefix in .intel_syntax");
2760 /// ParseDirectiveWord
2761 /// ::= .word [ expression (, expression)* ]
2762 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2763 MCAsmParser &Parser = getParser();
2764 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2766 const MCExpr *Value;
2767 if (getParser().parseExpression(Value))
2770 getParser().getStreamer().EmitValue(Value, Size);
2772 if (getLexer().is(AsmToken::EndOfStatement))
2775 // FIXME: Improve diagnostic.
2776 if (getLexer().isNot(AsmToken::Comma)) {
2777 Error(L, "unexpected token in directive");
2788 /// ParseDirectiveCode
2789 /// ::= .code16 | .code32 | .code64
2790 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2791 MCAsmParser &Parser = getParser();
2792 if (IDVal == ".code16") {
2794 if (!is16BitMode()) {
2795 SwitchMode(X86::Mode16Bit);
2796 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2798 } else if (IDVal == ".code32") {
2800 if (!is32BitMode()) {
2801 SwitchMode(X86::Mode32Bit);
2802 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2804 } else if (IDVal == ".code64") {
2806 if (!is64BitMode()) {
2807 SwitchMode(X86::Mode64Bit);
2808 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2811 Error(L, "unknown directive " + IDVal);
2818 // Force static initialization.
2819 extern "C" void LLVMInitializeX86AsmParser() {
2820 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2821 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2824 #define GET_REGISTER_MATCHER
2825 #define GET_MATCHER_IMPLEMENTATION
2826 #define GET_SUBTARGET_FEATURE_NAME
2827 #include "X86GenAsmMatcher.inc"