1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
57 class X86AsmParser : public MCTargetAsmParser {
60 const MCInstrInfo &MII;
61 ParseInstructionInfo *InstInfo;
62 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
64 SMLoc consumeToken() {
65 SMLoc Result = Parser.getTok().getLoc();
70 enum InfixCalculatorTok {
85 class InfixCalculator {
86 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
87 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
88 SmallVector<ICToken, 4> PostfixStack;
91 int64_t popOperand() {
92 assert (!PostfixStack.empty() && "Poped an empty stack!");
93 ICToken Op = PostfixStack.pop_back_val();
94 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
95 && "Expected and immediate or register!");
98 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
99 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
100 "Unexpected operand!");
101 PostfixStack.push_back(std::make_pair(Op, Val));
104 void popOperator() { InfixOperatorStack.pop_back(); }
105 void pushOperator(InfixCalculatorTok Op) {
106 // Push the new operator if the stack is empty.
107 if (InfixOperatorStack.empty()) {
108 InfixOperatorStack.push_back(Op);
112 // Push the new operator if it has a higher precedence than the operator
113 // on the top of the stack or the operator on the top of the stack is a
115 unsigned Idx = InfixOperatorStack.size() - 1;
116 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
117 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
118 InfixOperatorStack.push_back(Op);
122 // The operator on the top of the stack has higher precedence than the
124 unsigned ParenCount = 0;
126 // Nothing to process.
127 if (InfixOperatorStack.empty())
130 Idx = InfixOperatorStack.size() - 1;
131 StackOp = InfixOperatorStack[Idx];
132 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
135 // If we have an even parentheses count and we see a left parentheses,
136 // then stop processing.
137 if (!ParenCount && StackOp == IC_LPAREN)
140 if (StackOp == IC_RPAREN) {
142 InfixOperatorStack.pop_back();
143 } else if (StackOp == IC_LPAREN) {
145 InfixOperatorStack.pop_back();
147 InfixOperatorStack.pop_back();
148 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 // Push the new operator.
152 InfixOperatorStack.push_back(Op);
155 // Push any remaining operators onto the postfix stack.
156 while (!InfixOperatorStack.empty()) {
157 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
158 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
159 PostfixStack.push_back(std::make_pair(StackOp, 0));
162 if (PostfixStack.empty())
165 SmallVector<ICToken, 16> OperandStack;
166 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
167 ICToken Op = PostfixStack[i];
168 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
169 OperandStack.push_back(Op);
171 assert (OperandStack.size() > 1 && "Too few operands.");
173 ICToken Op2 = OperandStack.pop_back_val();
174 ICToken Op1 = OperandStack.pop_back_val();
177 report_fatal_error("Unexpected operator!");
180 Val = Op1.second + Op2.second;
181 OperandStack.push_back(std::make_pair(IC_IMM, Val));
184 Val = Op1.second - Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Multiply operation with an immediate and a register!");
190 Val = Op1.second * Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Divide operation with an immediate and a register!");
196 assert (Op2.second != 0 && "Division by zero!");
197 Val = Op1.second / Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "Or operation with an immediate and a register!");
203 Val = Op1.second | Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "And operation with an immediate and a register!");
209 Val = Op1.second & Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Left shift operation with an immediate and a register!");
215 Val = Op1.second << Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Right shift operation with an immediate and a register!");
221 Val = Op1.second >> Op2.second;
222 OperandStack.push_back(std::make_pair(IC_IMM, Val));
227 assert (OperandStack.size() == 1 && "Expected a single result.");
228 return OperandStack.pop_back_val().second;
232 enum IntelExprState {
252 class IntelExprStateMachine {
253 IntelExprState State, PrevState;
254 unsigned BaseReg, IndexReg, TmpReg, Scale;
258 bool StopOnLBrac, AddImmPrefix;
260 InlineAsmIdentifierInfo Info;
262 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
263 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
264 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
265 AddImmPrefix(addimmprefix) { Info.clear(); }
267 unsigned getBaseReg() { return BaseReg; }
268 unsigned getIndexReg() { return IndexReg; }
269 unsigned getScale() { return Scale; }
270 const MCExpr *getSym() { return Sym; }
271 StringRef getSymName() { return SymName; }
272 int64_t getImm() { return Imm + IC.execute(); }
273 bool isValidEndState() {
274 return State == IES_RBRAC || State == IES_INTEGER;
276 bool getStopOnLBrac() { return StopOnLBrac; }
277 bool getAddImmPrefix() { return AddImmPrefix; }
278 bool hadError() { return State == IES_ERROR; }
280 InlineAsmIdentifierInfo &getIdentifierInfo() {
285 IntelExprState CurrState = State;
294 IC.pushOperator(IC_OR);
297 PrevState = CurrState;
300 IntelExprState CurrState = State;
309 IC.pushOperator(IC_AND);
312 PrevState = CurrState;
315 IntelExprState CurrState = State;
324 IC.pushOperator(IC_LSHIFT);
327 PrevState = CurrState;
330 IntelExprState CurrState = State;
339 IC.pushOperator(IC_RSHIFT);
342 PrevState = CurrState;
345 IntelExprState CurrState = State;
354 IC.pushOperator(IC_PLUS);
355 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
356 // If we already have a BaseReg, then assume this is the IndexReg with
361 assert (!IndexReg && "BaseReg/IndexReg already set!");
368 PrevState = CurrState;
371 IntelExprState CurrState = State;
387 // Only push the minus operator if it is not a unary operator.
388 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
389 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
390 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
391 IC.pushOperator(IC_MINUS);
392 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
393 // If we already have a BaseReg, then assume this is the IndexReg with
398 assert (!IndexReg && "BaseReg/IndexReg already set!");
405 PrevState = CurrState;
408 IntelExprState CurrState = State;
418 PrevState = CurrState;
420 void onRegister(unsigned Reg) {
421 IntelExprState CurrState = State;
428 State = IES_REGISTER;
430 IC.pushOperand(IC_REGISTER);
433 // Index Register - Scale * Register
434 if (PrevState == IES_INTEGER) {
435 assert (!IndexReg && "IndexReg already set!");
436 State = IES_REGISTER;
438 // Get the scale and replace the 'Scale * Register' with '0'.
439 Scale = IC.popOperand();
440 IC.pushOperand(IC_IMM);
447 PrevState = CurrState;
449 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
460 SymName = SymRefName;
461 IC.pushOperand(IC_IMM);
465 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
466 IntelExprState CurrState = State;
482 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
483 // Index Register - Register * Scale
484 assert (!IndexReg && "IndexReg already set!");
487 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
488 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
491 // Get the scale and replace the 'Register * Scale' with '0'.
493 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
494 PrevState == IES_OR || PrevState == IES_AND ||
495 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
496 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
497 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
498 PrevState == IES_NOT) &&
499 CurrState == IES_MINUS) {
500 // Unary minus. No need to pop the minus operand because it was never
502 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
503 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
504 PrevState == IES_OR || PrevState == IES_AND ||
505 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
506 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
507 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
508 PrevState == IES_NOT) &&
509 CurrState == IES_NOT) {
510 // Unary not. No need to pop the not operand because it was never
512 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514 IC.pushOperand(IC_IMM, TmpInt);
518 PrevState = CurrState;
530 State = IES_MULTIPLY;
531 IC.pushOperator(IC_MULTIPLY);
544 IC.pushOperator(IC_DIVIDE);
556 IC.pushOperator(IC_PLUS);
561 IntelExprState CurrState = State;
570 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
571 // If we already have a BaseReg, then assume this is the IndexReg with
576 assert (!IndexReg && "BaseReg/IndexReg already set!");
583 PrevState = CurrState;
586 IntelExprState CurrState = State;
601 // FIXME: We don't handle this type of unary minus or not, yet.
602 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
603 PrevState == IES_OR || PrevState == IES_AND ||
604 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
605 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
606 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
607 PrevState == IES_NOT) &&
608 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
613 IC.pushOperator(IC_LPAREN);
616 PrevState = CurrState;
628 IC.pushOperator(IC_RPAREN);
634 MCAsmParser &getParser() const { return Parser; }
636 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
638 bool Error(SMLoc L, const Twine &Msg,
639 ArrayRef<SMRange> Ranges = None,
640 bool MatchingInlineAsm = false) {
641 if (MatchingInlineAsm) return true;
642 return Parser.Error(L, Msg, Ranges);
645 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
646 ArrayRef<SMRange> Ranges = None,
647 bool MatchingInlineAsm = false) {
648 Parser.eatToEndOfStatement();
649 return Error(L, Msg, Ranges, MatchingInlineAsm);
652 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
657 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
659 std::unique_ptr<X86Operand> ParseOperand();
660 std::unique_ptr<X86Operand> ParseATTOperand();
661 std::unique_ptr<X86Operand> ParseIntelOperand();
662 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
663 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
664 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
665 std::unique_ptr<X86Operand>
666 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
667 std::unique_ptr<X86Operand>
668 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
669 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
670 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
674 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
675 InlineAsmIdentifierInfo &Info,
676 bool IsUnevaluatedOperand, SMLoc &End);
678 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
680 std::unique_ptr<X86Operand>
681 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
682 unsigned IndexReg, unsigned Scale, SMLoc Start,
683 SMLoc End, unsigned Size, StringRef Identifier,
684 InlineAsmIdentifierInfo &Info);
686 bool ParseDirectiveWord(unsigned Size, SMLoc L);
687 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
689 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
691 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
692 /// instrumentation around Inst.
693 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
695 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
696 OperandVector &Operands, MCStreamer &Out,
698 bool MatchingInlineAsm) override;
700 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) override;
702 /// doSrcDstMatch - Returns true if operands are matching in their
703 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
704 /// the parsing mode (Intel vs. AT&T).
705 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
707 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
708 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
709 /// \return \c true if no parsing errors occurred, \c false otherwise.
710 bool HandleAVX512Operand(OperandVector &Operands,
711 const MCParsedAsmOperand &Op);
713 bool is64BitMode() const {
714 // FIXME: Can tablegen auto-generate this?
715 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
717 bool is32BitMode() const {
718 // FIXME: Can tablegen auto-generate this?
719 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
721 bool is16BitMode() const {
722 // FIXME: Can tablegen auto-generate this?
723 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
725 void SwitchMode(uint64_t mode) {
726 uint64_t oldMode = STI.getFeatureBits() &
727 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
728 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
729 setAvailableFeatures(FB);
730 assert(mode == (STI.getFeatureBits() &
731 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
734 unsigned getPointerWidth() {
735 if (is16BitMode()) return 16;
736 if (is32BitMode()) return 32;
737 if (is64BitMode()) return 64;
738 llvm_unreachable("invalid mode");
741 bool isParsingIntelSyntax() {
742 return getParser().getAssemblerDialect();
745 /// @name Auto-generated Matcher Functions
748 #define GET_ASSEMBLER_HEADER
749 #include "X86GenAsmMatcher.inc"
754 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
755 const MCInstrInfo &mii,
756 const MCTargetOptions &Options)
757 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
760 // Initialize the set of available features.
761 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
762 Instrumentation.reset(
763 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
766 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
768 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
769 SMLoc NameLoc, OperandVector &Operands) override;
771 bool ParseDirective(AsmToken DirectiveID) override;
773 } // end anonymous namespace
775 /// @name Auto-generated Match Functions
778 static unsigned MatchRegisterName(StringRef Name);
782 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
784 // If we have both a base register and an index register make sure they are
785 // both 64-bit or 32-bit registers.
786 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
787 if (BaseReg != 0 && IndexReg != 0) {
788 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
789 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
790 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
791 IndexReg != X86::RIZ) {
792 ErrMsg = "base register is 64-bit, but index register is not";
795 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
796 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
797 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
798 IndexReg != X86::EIZ){
799 ErrMsg = "base register is 32-bit, but index register is not";
802 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
803 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
804 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
805 ErrMsg = "base register is 16-bit, but index register is not";
808 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
809 IndexReg != X86::SI && IndexReg != X86::DI) ||
810 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
811 IndexReg != X86::BX && IndexReg != X86::BP)) {
812 ErrMsg = "invalid 16-bit base/index register combination";
820 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
822 // Return true and let a normal complaint about bogus operands happen.
823 if (!Op1.isMem() || !Op2.isMem())
826 // Actually these might be the other way round if Intel syntax is
827 // being used. It doesn't matter.
828 unsigned diReg = Op1.Mem.BaseReg;
829 unsigned siReg = Op2.Mem.BaseReg;
831 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
832 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
833 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
834 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
835 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
836 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
837 // Again, return true and let another error happen.
841 bool X86AsmParser::ParseRegister(unsigned &RegNo,
842 SMLoc &StartLoc, SMLoc &EndLoc) {
844 const AsmToken &PercentTok = Parser.getTok();
845 StartLoc = PercentTok.getLoc();
847 // If we encounter a %, ignore it. This code handles registers with and
848 // without the prefix, unprefixed registers can occur in cfi directives.
849 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
850 Parser.Lex(); // Eat percent token.
852 const AsmToken &Tok = Parser.getTok();
853 EndLoc = Tok.getEndLoc();
855 if (Tok.isNot(AsmToken::Identifier)) {
856 if (isParsingIntelSyntax()) return true;
857 return Error(StartLoc, "invalid register name",
858 SMRange(StartLoc, EndLoc));
861 RegNo = MatchRegisterName(Tok.getString());
863 // If the match failed, try the register name as lowercase.
865 RegNo = MatchRegisterName(Tok.getString().lower());
867 if (!is64BitMode()) {
868 // FIXME: This should be done using Requires<Not64BitMode> and
869 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
871 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
873 if (RegNo == X86::RIZ ||
874 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
875 X86II::isX86_64NonExtLowByteReg(RegNo) ||
876 X86II::isX86_64ExtendedReg(RegNo))
877 return Error(StartLoc, "register %"
878 + Tok.getString() + " is only available in 64-bit mode",
879 SMRange(StartLoc, EndLoc));
882 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
883 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
885 Parser.Lex(); // Eat 'st'
887 // Check to see if we have '(4)' after %st.
888 if (getLexer().isNot(AsmToken::LParen))
893 const AsmToken &IntTok = Parser.getTok();
894 if (IntTok.isNot(AsmToken::Integer))
895 return Error(IntTok.getLoc(), "expected stack index");
896 switch (IntTok.getIntVal()) {
897 case 0: RegNo = X86::ST0; break;
898 case 1: RegNo = X86::ST1; break;
899 case 2: RegNo = X86::ST2; break;
900 case 3: RegNo = X86::ST3; break;
901 case 4: RegNo = X86::ST4; break;
902 case 5: RegNo = X86::ST5; break;
903 case 6: RegNo = X86::ST6; break;
904 case 7: RegNo = X86::ST7; break;
905 default: return Error(IntTok.getLoc(), "invalid stack index");
908 if (getParser().Lex().isNot(AsmToken::RParen))
909 return Error(Parser.getTok().getLoc(), "expected ')'");
911 EndLoc = Parser.getTok().getEndLoc();
912 Parser.Lex(); // Eat ')'
916 EndLoc = Parser.getTok().getEndLoc();
918 // If this is "db[0-7]", match it as an alias
920 if (RegNo == 0 && Tok.getString().size() == 3 &&
921 Tok.getString().startswith("db")) {
922 switch (Tok.getString()[2]) {
923 case '0': RegNo = X86::DR0; break;
924 case '1': RegNo = X86::DR1; break;
925 case '2': RegNo = X86::DR2; break;
926 case '3': RegNo = X86::DR3; break;
927 case '4': RegNo = X86::DR4; break;
928 case '5': RegNo = X86::DR5; break;
929 case '6': RegNo = X86::DR6; break;
930 case '7': RegNo = X86::DR7; break;
934 EndLoc = Parser.getTok().getEndLoc();
935 Parser.Lex(); // Eat it.
941 if (isParsingIntelSyntax()) return true;
942 return Error(StartLoc, "invalid register name",
943 SMRange(StartLoc, EndLoc));
946 Parser.Lex(); // Eat identifier token.
950 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
952 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
953 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
954 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
955 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
958 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
960 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
961 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
962 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
963 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
966 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
967 if (isParsingIntelSyntax())
968 return ParseIntelOperand();
969 return ParseATTOperand();
972 /// getIntelMemOperandSize - Return intel memory operand size.
973 static unsigned getIntelMemOperandSize(StringRef OpStr) {
974 unsigned Size = StringSwitch<unsigned>(OpStr)
975 .Cases("BYTE", "byte", 8)
976 .Cases("WORD", "word", 16)
977 .Cases("DWORD", "dword", 32)
978 .Cases("QWORD", "qword", 64)
979 .Cases("XWORD", "xword", 80)
980 .Cases("XMMWORD", "xmmword", 128)
981 .Cases("YMMWORD", "ymmword", 256)
982 .Cases("ZMMWORD", "zmmword", 512)
983 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
988 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
989 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
990 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
991 InlineAsmIdentifierInfo &Info) {
992 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
993 // some other label reference.
994 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
995 // Insert an explicit size if the user didn't have one.
997 Size = getPointerWidth();
998 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1002 // Create an absolute memory reference in order to match against
1003 // instructions taking a PC relative operand.
1004 return X86Operand::CreateMem(Disp, Start, End, Size, Identifier,
1008 // We either have a direct symbol reference, or an offset from a symbol. The
1009 // parser always puts the symbol on the LHS, so look there for size
1010 // calculation purposes.
1011 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1013 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1016 Size = Info.Type * 8; // Size is in terms of bits in this context.
1018 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1023 // When parsing inline assembly we set the base register to a non-zero value
1024 // if we don't know the actual value at this time. This is necessary to
1025 // get the matching correct in some cases.
1026 BaseReg = BaseReg ? BaseReg : 1;
1027 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1028 End, Size, Identifier, Info.OpDecl);
1032 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1033 StringRef SymName, int64_t ImmDisp,
1034 int64_t FinalImmDisp, SMLoc &BracLoc,
1035 SMLoc &StartInBrac, SMLoc &End) {
1036 // Remove the '[' and ']' from the IR string.
1037 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1038 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1040 // If ImmDisp is non-zero, then we parsed a displacement before the
1041 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1042 // If ImmDisp doesn't match the displacement computed by the state machine
1043 // then we have an additional displacement in the bracketed expression.
1044 if (ImmDisp != FinalImmDisp) {
1046 // We have an immediate displacement before the bracketed expression.
1047 // Adjust this to match the final immediate displacement.
1049 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1050 E = AsmRewrites->end(); I != E; ++I) {
1051 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1053 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1054 assert (!Found && "ImmDisp already rewritten.");
1055 (*I).Kind = AOK_Imm;
1056 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1057 (*I).Val = FinalImmDisp;
1062 assert (Found && "Unable to rewrite ImmDisp.");
1065 // We have a symbolic and an immediate displacement, but no displacement
1066 // before the bracketed expression. Put the immediate displacement
1067 // before the bracketed expression.
1068 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1071 // Remove all the ImmPrefix rewrites within the brackets.
1072 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1073 E = AsmRewrites->end(); I != E; ++I) {
1074 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1076 if ((*I).Kind == AOK_ImmPrefix)
1077 (*I).Kind = AOK_Delete;
1079 const char *SymLocPtr = SymName.data();
1080 // Skip everything before the symbol.
1081 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1082 assert(Len > 0 && "Expected a non-negative length.");
1083 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1085 // Skip everything after the symbol.
1086 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1087 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1088 assert(Len > 0 && "Expected a non-negative length.");
1089 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1093 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1094 const AsmToken &Tok = Parser.getTok();
1098 bool UpdateLocLex = true;
1100 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1101 // identifier. Don't try an parse it as a register.
1102 if (Tok.getString().startswith("."))
1105 // If we're parsing an immediate expression, we don't expect a '['.
1106 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1109 AsmToken::TokenKind TK = getLexer().getKind();
1112 if (SM.isValidEndState()) {
1116 return Error(Tok.getLoc(), "unknown token in expression");
1118 case AsmToken::EndOfStatement: {
1122 case AsmToken::String:
1123 case AsmToken::Identifier: {
1124 // This could be a register or a symbolic displacement.
1127 SMLoc IdentLoc = Tok.getLoc();
1128 StringRef Identifier = Tok.getString();
1129 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1130 SM.onRegister(TmpReg);
1131 UpdateLocLex = false;
1134 if (!isParsingInlineAsm()) {
1135 if (getParser().parsePrimaryExpr(Val, End))
1136 return Error(Tok.getLoc(), "Unexpected identifier!");
1138 // This is a dot operator, not an adjacent identifier.
1139 if (Identifier.find('.') != StringRef::npos) {
1142 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1143 if (ParseIntelIdentifier(Val, Identifier, Info,
1144 /*Unevaluated=*/false, End))
1148 SM.onIdentifierExpr(Val, Identifier);
1149 UpdateLocLex = false;
1152 return Error(Tok.getLoc(), "Unexpected identifier!");
1154 case AsmToken::Integer: {
1156 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1157 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1159 // Look for 'b' or 'f' following an Integer as a directional label
1160 SMLoc Loc = getTok().getLoc();
1161 int64_t IntVal = getTok().getIntVal();
1162 End = consumeToken();
1163 UpdateLocLex = false;
1164 if (getLexer().getKind() == AsmToken::Identifier) {
1165 StringRef IDVal = getTok().getString();
1166 if (IDVal == "f" || IDVal == "b") {
1168 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1169 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1171 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1172 if (IDVal == "b" && Sym->isUndefined())
1173 return Error(Loc, "invalid reference to undefined symbol");
1174 StringRef Identifier = Sym->getName();
1175 SM.onIdentifierExpr(Val, Identifier);
1176 End = consumeToken();
1178 if (SM.onInteger(IntVal, ErrMsg))
1179 return Error(Loc, ErrMsg);
1182 if (SM.onInteger(IntVal, ErrMsg))
1183 return Error(Loc, ErrMsg);
1187 case AsmToken::Plus: SM.onPlus(); break;
1188 case AsmToken::Minus: SM.onMinus(); break;
1189 case AsmToken::Tilde: SM.onNot(); break;
1190 case AsmToken::Star: SM.onStar(); break;
1191 case AsmToken::Slash: SM.onDivide(); break;
1192 case AsmToken::Pipe: SM.onOr(); break;
1193 case AsmToken::Amp: SM.onAnd(); break;
1194 case AsmToken::LessLess:
1195 SM.onLShift(); break;
1196 case AsmToken::GreaterGreater:
1197 SM.onRShift(); break;
1198 case AsmToken::LBrac: SM.onLBrac(); break;
1199 case AsmToken::RBrac: SM.onRBrac(); break;
1200 case AsmToken::LParen: SM.onLParen(); break;
1201 case AsmToken::RParen: SM.onRParen(); break;
1204 return Error(Tok.getLoc(), "unknown token in expression");
1206 if (!Done && UpdateLocLex)
1207 End = consumeToken();
1212 std::unique_ptr<X86Operand>
1213 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1214 int64_t ImmDisp, unsigned Size) {
1215 const AsmToken &Tok = Parser.getTok();
1216 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1217 if (getLexer().isNot(AsmToken::LBrac))
1218 return ErrorOperand(BracLoc, "Expected '[' token!");
1219 Parser.Lex(); // Eat '['
1221 SMLoc StartInBrac = Tok.getLoc();
1222 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1223 // may have already parsed an immediate displacement before the bracketed
1225 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1226 if (ParseIntelExpression(SM, End))
1229 const MCExpr *Disp = nullptr;
1230 if (const MCExpr *Sym = SM.getSym()) {
1231 // A symbolic displacement.
1233 if (isParsingInlineAsm())
1234 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1235 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1239 if (SM.getImm() || !Disp) {
1240 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1242 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1244 Disp = Imm; // An immediate displacement only.
1247 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1248 // will in fact do global lookup the field name inside all global typedefs,
1249 // but we don't emulate that.
1250 if (Tok.getString().find('.') != StringRef::npos) {
1251 const MCExpr *NewDisp;
1252 if (ParseIntelDotOperator(Disp, NewDisp))
1255 End = Tok.getEndLoc();
1256 Parser.Lex(); // Eat the field.
1260 int BaseReg = SM.getBaseReg();
1261 int IndexReg = SM.getIndexReg();
1262 int Scale = SM.getScale();
1263 if (!isParsingInlineAsm()) {
1265 if (!BaseReg && !IndexReg) {
1267 return X86Operand::CreateMem(Disp, Start, End, Size);
1269 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1272 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1273 Error(StartInBrac, ErrMsg);
1276 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1280 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1281 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1282 End, Size, SM.getSymName(), Info);
1285 // Inline assembly may use variable names with namespace alias qualifiers.
1286 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1287 StringRef &Identifier,
1288 InlineAsmIdentifierInfo &Info,
1289 bool IsUnevaluatedOperand, SMLoc &End) {
1290 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1293 StringRef LineBuf(Identifier.data());
1294 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1296 const AsmToken &Tok = Parser.getTok();
1298 // Advance the token stream until the end of the current token is
1299 // after the end of what the frontend claimed.
1300 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1302 End = Tok.getEndLoc();
1305 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1306 if (End.getPointer() == EndPtr) break;
1309 // Create the symbol reference.
1310 Identifier = LineBuf;
1311 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1312 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1313 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1317 /// \brief Parse intel style segment override.
1318 std::unique_ptr<X86Operand>
1319 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1321 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1322 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1323 if (Tok.isNot(AsmToken::Colon))
1324 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1325 Parser.Lex(); // Eat ':'
1327 int64_t ImmDisp = 0;
1328 if (getLexer().is(AsmToken::Integer)) {
1329 ImmDisp = Tok.getIntVal();
1330 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1332 if (isParsingInlineAsm())
1333 InstInfo->AsmRewrites->push_back(
1334 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1336 if (getLexer().isNot(AsmToken::LBrac)) {
1337 // An immediate following a 'segment register', 'colon' token sequence can
1338 // be followed by a bracketed expression. If it isn't we know we have our
1339 // final segment override.
1340 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1341 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1342 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1347 if (getLexer().is(AsmToken::LBrac))
1348 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1352 if (!isParsingInlineAsm()) {
1353 if (getParser().parsePrimaryExpr(Val, End))
1354 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1356 return X86Operand::CreateMem(Val, Start, End, Size);
1359 InlineAsmIdentifierInfo Info;
1360 StringRef Identifier = Tok.getString();
1361 if (ParseIntelIdentifier(Val, Identifier, Info,
1362 /*Unevaluated=*/false, End))
1364 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1365 /*Scale=*/1, Start, End, Size, Identifier, Info);
1368 /// ParseIntelMemOperand - Parse intel style memory operand.
1369 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1372 const AsmToken &Tok = Parser.getTok();
1375 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1376 if (getLexer().is(AsmToken::LBrac))
1377 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1378 assert(ImmDisp == 0);
1381 if (!isParsingInlineAsm()) {
1382 if (getParser().parsePrimaryExpr(Val, End))
1383 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1385 return X86Operand::CreateMem(Val, Start, End, Size);
1388 InlineAsmIdentifierInfo Info;
1389 StringRef Identifier = Tok.getString();
1390 if (ParseIntelIdentifier(Val, Identifier, Info,
1391 /*Unevaluated=*/false, End))
1394 if (!getLexer().is(AsmToken::LBrac))
1395 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1396 /*Scale=*/1, Start, End, Size, Identifier, Info);
1398 Parser.Lex(); // Eat '['
1400 // Parse Identifier [ ImmDisp ]
1401 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1402 /*AddImmPrefix=*/false);
1403 if (ParseIntelExpression(SM, End))
1407 Error(Start, "cannot use more than one symbol in memory operand");
1410 if (SM.getBaseReg()) {
1411 Error(Start, "cannot use base register with variable reference");
1414 if (SM.getIndexReg()) {
1415 Error(Start, "cannot use index register with variable reference");
1419 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1420 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1421 // we're pointing to a local variable in memory, so the base register is
1422 // really the frame or stack pointer.
1423 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1424 /*Scale=*/1, Start, End, Size, Identifier,
1428 /// Parse the '.' operator.
1429 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1430 const MCExpr *&NewDisp) {
1431 const AsmToken &Tok = Parser.getTok();
1432 int64_t OrigDispVal, DotDispVal;
1434 // FIXME: Handle non-constant expressions.
1435 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1436 OrigDispVal = OrigDisp->getValue();
1438 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1440 // Drop the optional '.'.
1441 StringRef DotDispStr = Tok.getString();
1442 if (DotDispStr.startswith("."))
1443 DotDispStr = DotDispStr.drop_front(1);
1445 // .Imm gets lexed as a real.
1446 if (Tok.is(AsmToken::Real)) {
1448 DotDispStr.getAsInteger(10, DotDisp);
1449 DotDispVal = DotDisp.getZExtValue();
1450 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1452 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1453 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1455 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1456 DotDispVal = DotDisp;
1458 return Error(Tok.getLoc(), "Unexpected token type!");
1460 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1461 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1462 unsigned Len = DotDispStr.size();
1463 unsigned Val = OrigDispVal + DotDispVal;
1464 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1468 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1472 /// Parse the 'offset' operator. This operator is used to specify the
1473 /// location rather then the content of a variable.
1474 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1475 const AsmToken &Tok = Parser.getTok();
1476 SMLoc OffsetOfLoc = Tok.getLoc();
1477 Parser.Lex(); // Eat offset.
1480 InlineAsmIdentifierInfo Info;
1481 SMLoc Start = Tok.getLoc(), End;
1482 StringRef Identifier = Tok.getString();
1483 if (ParseIntelIdentifier(Val, Identifier, Info,
1484 /*Unevaluated=*/false, End))
1487 // Don't emit the offset operator.
1488 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1490 // The offset operator will have an 'r' constraint, thus we need to create
1491 // register operand to ensure proper matching. Just pick a GPR based on
1492 // the size of a pointer.
1494 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1495 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1496 OffsetOfLoc, Identifier, Info.OpDecl);
1499 enum IntelOperatorKind {
1505 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1506 /// returns the number of elements in an array. It returns the value 1 for
1507 /// non-array variables. The SIZE operator returns the size of a C or C++
1508 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1509 /// TYPE operator returns the size of a C or C++ type or variable. If the
1510 /// variable is an array, TYPE returns the size of a single element.
1511 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1512 const AsmToken &Tok = Parser.getTok();
1513 SMLoc TypeLoc = Tok.getLoc();
1514 Parser.Lex(); // Eat operator.
1516 const MCExpr *Val = nullptr;
1517 InlineAsmIdentifierInfo Info;
1518 SMLoc Start = Tok.getLoc(), End;
1519 StringRef Identifier = Tok.getString();
1520 if (ParseIntelIdentifier(Val, Identifier, Info,
1521 /*Unevaluated=*/true, End))
1525 return ErrorOperand(Start, "unable to lookup expression");
1529 default: llvm_unreachable("Unexpected operand kind!");
1530 case IOK_LENGTH: CVal = Info.Length; break;
1531 case IOK_SIZE: CVal = Info.Size; break;
1532 case IOK_TYPE: CVal = Info.Type; break;
1535 // Rewrite the type operator and the C or C++ type or variable in terms of an
1536 // immediate. E.g. TYPE foo -> $$4
1537 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1538 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1540 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1541 return X86Operand::CreateImm(Imm, Start, End);
1544 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1545 const AsmToken &Tok = Parser.getTok();
1548 // Offset, length, type and size operators.
1549 if (isParsingInlineAsm()) {
1550 StringRef AsmTokStr = Tok.getString();
1551 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1552 return ParseIntelOffsetOfOperator();
1553 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1554 return ParseIntelOperator(IOK_LENGTH);
1555 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1556 return ParseIntelOperator(IOK_SIZE);
1557 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1558 return ParseIntelOperator(IOK_TYPE);
1561 unsigned Size = getIntelMemOperandSize(Tok.getString());
1563 Parser.Lex(); // Eat operand size (e.g., byte, word).
1564 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1565 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1566 Parser.Lex(); // Eat ptr.
1568 Start = Tok.getLoc();
1571 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1572 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1573 AsmToken StartTok = Tok;
1574 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1575 /*AddImmPrefix=*/false);
1576 if (ParseIntelExpression(SM, End))
1579 int64_t Imm = SM.getImm();
1580 if (isParsingInlineAsm()) {
1581 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1582 if (StartTok.getString().size() == Len)
1583 // Just add a prefix if this wasn't a complex immediate expression.
1584 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1586 // Otherwise, rewrite the complex expression as a single immediate.
1587 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1590 if (getLexer().isNot(AsmToken::LBrac)) {
1591 // If a directional label (ie. 1f or 2b) was parsed above from
1592 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1593 // to the MCExpr with the directional local symbol and this is a
1594 // memory operand not an immediate operand.
1596 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1598 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1599 return X86Operand::CreateImm(ImmExpr, Start, End);
1602 // Only positive immediates are valid.
1604 return ErrorOperand(Start, "expected a positive immediate displacement "
1605 "before bracketed expr.");
1607 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1608 return ParseIntelMemOperand(Imm, Start, Size);
1613 if (!ParseRegister(RegNo, Start, End)) {
1614 // If this is a segment register followed by a ':', then this is the start
1615 // of a segment override, otherwise this is a normal register reference.
1616 if (getLexer().isNot(AsmToken::Colon))
1617 return X86Operand::CreateReg(RegNo, Start, End);
1619 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1623 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1626 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1627 switch (getLexer().getKind()) {
1629 // Parse a memory operand with no segment register.
1630 return ParseMemOperand(0, Parser.getTok().getLoc());
1631 case AsmToken::Percent: {
1632 // Read the register.
1635 if (ParseRegister(RegNo, Start, End)) return nullptr;
1636 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1637 Error(Start, "%eiz and %riz can only be used as index registers",
1638 SMRange(Start, End));
1642 // If this is a segment register followed by a ':', then this is the start
1643 // of a memory reference, otherwise this is a normal register reference.
1644 if (getLexer().isNot(AsmToken::Colon))
1645 return X86Operand::CreateReg(RegNo, Start, End);
1647 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1648 return ErrorOperand(Start, "invalid segment register");
1650 getParser().Lex(); // Eat the colon.
1651 return ParseMemOperand(RegNo, Start);
1653 case AsmToken::Dollar: {
1654 // $42 -> immediate.
1655 SMLoc Start = Parser.getTok().getLoc(), End;
1658 if (getParser().parseExpression(Val, End))
1660 return X86Operand::CreateImm(Val, Start, End);
1665 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1666 const MCParsedAsmOperand &Op) {
1667 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1668 if (getLexer().is(AsmToken::LCurly)) {
1669 // Eat "{" and mark the current place.
1670 const SMLoc consumedToken = consumeToken();
1671 // Distinguish {1to<NUM>} from {%k<NUM>}.
1672 if(getLexer().is(AsmToken::Integer)) {
1673 // Parse memory broadcasting ({1to<NUM>}).
1674 if (getLexer().getTok().getIntVal() != 1)
1675 return !ErrorAndEatStatement(getLexer().getLoc(),
1676 "Expected 1to<NUM> at this point");
1677 Parser.Lex(); // Eat "1" of 1to8
1678 if (!getLexer().is(AsmToken::Identifier) ||
1679 !getLexer().getTok().getIdentifier().startswith("to"))
1680 return !ErrorAndEatStatement(getLexer().getLoc(),
1681 "Expected 1to<NUM> at this point");
1682 // Recognize only reasonable suffixes.
1683 const char *BroadcastPrimitive =
1684 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1685 .Case("to2", "{1to2}")
1686 .Case("to4", "{1to4}")
1687 .Case("to8", "{1to8}")
1688 .Case("to16", "{1to16}")
1690 if (!BroadcastPrimitive)
1691 return !ErrorAndEatStatement(getLexer().getLoc(),
1692 "Invalid memory broadcast primitive.");
1693 Parser.Lex(); // Eat "toN" of 1toN
1694 if (!getLexer().is(AsmToken::RCurly))
1695 return !ErrorAndEatStatement(getLexer().getLoc(),
1696 "Expected } at this point");
1697 Parser.Lex(); // Eat "}"
1698 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1700 // No AVX512 specific primitives can pass
1701 // after memory broadcasting, so return.
1704 // Parse mask register {%k1}
1705 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1706 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1707 Operands.push_back(std::move(Op));
1708 if (!getLexer().is(AsmToken::RCurly))
1709 return !ErrorAndEatStatement(getLexer().getLoc(),
1710 "Expected } at this point");
1711 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1713 // Parse "zeroing non-masked" semantic {z}
1714 if (getLexer().is(AsmToken::LCurly)) {
1715 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1716 if (!getLexer().is(AsmToken::Identifier) ||
1717 getLexer().getTok().getIdentifier() != "z")
1718 return !ErrorAndEatStatement(getLexer().getLoc(),
1719 "Expected z at this point");
1720 Parser.Lex(); // Eat the z
1721 if (!getLexer().is(AsmToken::RCurly))
1722 return !ErrorAndEatStatement(getLexer().getLoc(),
1723 "Expected } at this point");
1724 Parser.Lex(); // Eat the }
1733 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1734 /// has already been parsed if present.
1735 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1738 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1739 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1740 // only way to do this without lookahead is to eat the '(' and see what is
1742 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1743 if (getLexer().isNot(AsmToken::LParen)) {
1745 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1747 // After parsing the base expression we could either have a parenthesized
1748 // memory address or not. If not, return now. If so, eat the (.
1749 if (getLexer().isNot(AsmToken::LParen)) {
1750 // Unless we have a segment register, treat this as an immediate.
1752 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1753 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1759 // Okay, we have a '('. We don't know if this is an expression or not, but
1760 // so we have to eat the ( to see beyond it.
1761 SMLoc LParenLoc = Parser.getTok().getLoc();
1762 Parser.Lex(); // Eat the '('.
1764 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1765 // Nothing to do here, fall into the code below with the '(' part of the
1766 // memory operand consumed.
1770 // It must be an parenthesized expression, parse it now.
1771 if (getParser().parseParenExpression(Disp, ExprEnd))
1774 // After parsing the base expression we could either have a parenthesized
1775 // memory address or not. If not, return now. If so, eat the (.
1776 if (getLexer().isNot(AsmToken::LParen)) {
1777 // Unless we have a segment register, treat this as an immediate.
1779 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1780 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1788 // If we reached here, then we just ate the ( of the memory operand. Process
1789 // the rest of the memory operand.
1790 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1791 SMLoc IndexLoc, BaseLoc;
1793 if (getLexer().is(AsmToken::Percent)) {
1794 SMLoc StartLoc, EndLoc;
1795 BaseLoc = Parser.getTok().getLoc();
1796 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1797 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1798 Error(StartLoc, "eiz and riz can only be used as index registers",
1799 SMRange(StartLoc, EndLoc));
1804 if (getLexer().is(AsmToken::Comma)) {
1805 Parser.Lex(); // Eat the comma.
1806 IndexLoc = Parser.getTok().getLoc();
1808 // Following the comma we should have either an index register, or a scale
1809 // value. We don't support the later form, but we want to parse it
1812 // Not that even though it would be completely consistent to support syntax
1813 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1814 if (getLexer().is(AsmToken::Percent)) {
1816 if (ParseRegister(IndexReg, L, L)) return nullptr;
1818 if (getLexer().isNot(AsmToken::RParen)) {
1819 // Parse the scale amount:
1820 // ::= ',' [scale-expression]
1821 if (getLexer().isNot(AsmToken::Comma)) {
1822 Error(Parser.getTok().getLoc(),
1823 "expected comma in scale expression");
1826 Parser.Lex(); // Eat the comma.
1828 if (getLexer().isNot(AsmToken::RParen)) {
1829 SMLoc Loc = Parser.getTok().getLoc();
1832 if (getParser().parseAbsoluteExpression(ScaleVal)){
1833 Error(Loc, "expected scale expression");
1837 // Validate the scale amount.
1838 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1840 Error(Loc, "scale factor in 16-bit address must be 1");
1843 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1844 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1847 Scale = (unsigned)ScaleVal;
1850 } else if (getLexer().isNot(AsmToken::RParen)) {
1851 // A scale amount without an index is ignored.
1853 SMLoc Loc = Parser.getTok().getLoc();
1856 if (getParser().parseAbsoluteExpression(Value))
1860 Warning(Loc, "scale factor without index register is ignored");
1865 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1866 if (getLexer().isNot(AsmToken::RParen)) {
1867 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1870 SMLoc MemEnd = Parser.getTok().getEndLoc();
1871 Parser.Lex(); // Eat the ')'.
1873 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1874 // and then only in non-64-bit modes. Except for DX, which is a special case
1875 // because an unofficial form of in/out instructions uses it.
1876 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1877 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1878 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1879 BaseReg != X86::DX) {
1880 Error(BaseLoc, "invalid 16-bit base register");
1884 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1885 Error(IndexLoc, "16-bit memory operand may not include only index register");
1890 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1891 Error(BaseLoc, ErrMsg);
1895 if (SegReg || BaseReg || IndexReg)
1896 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1898 return X86Operand::CreateMem(Disp, MemStart, MemEnd);
1901 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1902 SMLoc NameLoc, OperandVector &Operands) {
1904 StringRef PatchedName = Name;
1906 // FIXME: Hack to recognize setneb as setne.
1907 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1908 PatchedName != "setb" && PatchedName != "setnb")
1909 PatchedName = PatchedName.substr(0, Name.size()-1);
1911 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1912 const MCExpr *ExtraImmOp = nullptr;
1913 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1914 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1915 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1916 bool IsVCMP = PatchedName[0] == 'v';
1917 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1918 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1919 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1923 .Case("unord", 0x03)
1928 /* AVX only from here */
1929 .Case("eq_uq", 0x08)
1932 .Case("false", 0x0B)
1933 .Case("neq_oq", 0x0C)
1937 .Case("eq_os", 0x10)
1938 .Case("lt_oq", 0x11)
1939 .Case("le_oq", 0x12)
1940 .Case("unord_s", 0x13)
1941 .Case("neq_us", 0x14)
1942 .Case("nlt_uq", 0x15)
1943 .Case("nle_uq", 0x16)
1944 .Case("ord_s", 0x17)
1945 .Case("eq_us", 0x18)
1946 .Case("nge_uq", 0x19)
1947 .Case("ngt_uq", 0x1A)
1948 .Case("false_os", 0x1B)
1949 .Case("neq_os", 0x1C)
1950 .Case("ge_oq", 0x1D)
1951 .Case("gt_oq", 0x1E)
1952 .Case("true_us", 0x1F)
1954 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1955 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1956 getParser().getContext());
1957 if (PatchedName.endswith("ss")) {
1958 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1959 } else if (PatchedName.endswith("sd")) {
1960 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1961 } else if (PatchedName.endswith("ps")) {
1962 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1964 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1965 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1970 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1972 if (ExtraImmOp && !isParsingIntelSyntax())
1973 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1975 // Determine whether this is an instruction prefix.
1977 Name == "lock" || Name == "rep" ||
1978 Name == "repe" || Name == "repz" ||
1979 Name == "repne" || Name == "repnz" ||
1980 Name == "rex64" || Name == "data16";
1983 // This does the actual operand parsing. Don't parse any more if we have a
1984 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1985 // just want to parse the "lock" as the first instruction and the "incl" as
1987 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1989 // Parse '*' modifier.
1990 if (getLexer().is(AsmToken::Star))
1991 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1993 // Read the operands.
1995 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1996 Operands.push_back(std::move(Op));
1997 if (!HandleAVX512Operand(Operands, *Operands.back()))
2000 Parser.eatToEndOfStatement();
2003 // check for comma and eat it
2004 if (getLexer().is(AsmToken::Comma))
2010 if (getLexer().isNot(AsmToken::EndOfStatement))
2011 return ErrorAndEatStatement(getLexer().getLoc(),
2012 "unexpected token in argument list");
2015 // Consume the EndOfStatement or the prefix separator Slash
2016 if (getLexer().is(AsmToken::EndOfStatement) ||
2017 (isPrefix && getLexer().is(AsmToken::Slash)))
2020 if (ExtraImmOp && isParsingIntelSyntax())
2021 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2023 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2024 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2025 // documented form in various unofficial manuals, so a lot of code uses it.
2026 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2027 Operands.size() == 3) {
2028 X86Operand &Op = (X86Operand &)*Operands.back();
2029 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2030 isa<MCConstantExpr>(Op.Mem.Disp) &&
2031 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2032 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2033 SMLoc Loc = Op.getEndLoc();
2034 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2037 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2038 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2039 Operands.size() == 3) {
2040 X86Operand &Op = (X86Operand &)*Operands[1];
2041 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2042 isa<MCConstantExpr>(Op.Mem.Disp) &&
2043 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2044 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2045 SMLoc Loc = Op.getEndLoc();
2046 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2050 // Append default arguments to "ins[bwld]"
2051 if (Name.startswith("ins") && Operands.size() == 1 &&
2052 (Name == "insb" || Name == "insw" || Name == "insl" ||
2054 if (isParsingIntelSyntax()) {
2055 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2056 Operands.push_back(DefaultMemDIOperand(NameLoc));
2058 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2059 Operands.push_back(DefaultMemDIOperand(NameLoc));
2063 // Append default arguments to "outs[bwld]"
2064 if (Name.startswith("outs") && Operands.size() == 1 &&
2065 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2066 Name == "outsd" )) {
2067 if (isParsingIntelSyntax()) {
2068 Operands.push_back(DefaultMemSIOperand(NameLoc));
2069 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2071 Operands.push_back(DefaultMemSIOperand(NameLoc));
2072 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2076 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2077 // values of $SIREG according to the mode. It would be nice if this
2078 // could be achieved with InstAlias in the tables.
2079 if (Name.startswith("lods") && Operands.size() == 1 &&
2080 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2081 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2082 Operands.push_back(DefaultMemSIOperand(NameLoc));
2084 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2085 // values of $DIREG according to the mode. It would be nice if this
2086 // could be achieved with InstAlias in the tables.
2087 if (Name.startswith("stos") && Operands.size() == 1 &&
2088 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2089 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2090 Operands.push_back(DefaultMemDIOperand(NameLoc));
2092 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2093 // values of $DIREG according to the mode. It would be nice if this
2094 // could be achieved with InstAlias in the tables.
2095 if (Name.startswith("scas") && Operands.size() == 1 &&
2096 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2097 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2098 Operands.push_back(DefaultMemDIOperand(NameLoc));
2100 // Add default SI and DI operands to "cmps[bwlq]".
2101 if (Name.startswith("cmps") &&
2102 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2103 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2104 if (Operands.size() == 1) {
2105 if (isParsingIntelSyntax()) {
2106 Operands.push_back(DefaultMemSIOperand(NameLoc));
2107 Operands.push_back(DefaultMemDIOperand(NameLoc));
2109 Operands.push_back(DefaultMemDIOperand(NameLoc));
2110 Operands.push_back(DefaultMemSIOperand(NameLoc));
2112 } else if (Operands.size() == 3) {
2113 X86Operand &Op = (X86Operand &)*Operands[1];
2114 X86Operand &Op2 = (X86Operand &)*Operands[2];
2115 if (!doSrcDstMatch(Op, Op2))
2116 return Error(Op.getStartLoc(),
2117 "mismatching source and destination index registers");
2121 // Add default SI and DI operands to "movs[bwlq]".
2122 if ((Name.startswith("movs") &&
2123 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2124 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2125 (Name.startswith("smov") &&
2126 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2127 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2128 if (Operands.size() == 1) {
2129 if (Name == "movsd")
2130 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2131 if (isParsingIntelSyntax()) {
2132 Operands.push_back(DefaultMemDIOperand(NameLoc));
2133 Operands.push_back(DefaultMemSIOperand(NameLoc));
2135 Operands.push_back(DefaultMemSIOperand(NameLoc));
2136 Operands.push_back(DefaultMemDIOperand(NameLoc));
2138 } else if (Operands.size() == 3) {
2139 X86Operand &Op = (X86Operand &)*Operands[1];
2140 X86Operand &Op2 = (X86Operand &)*Operands[2];
2141 if (!doSrcDstMatch(Op, Op2))
2142 return Error(Op.getStartLoc(),
2143 "mismatching source and destination index registers");
2147 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2149 if ((Name.startswith("shr") || Name.startswith("sar") ||
2150 Name.startswith("shl") || Name.startswith("sal") ||
2151 Name.startswith("rcl") || Name.startswith("rcr") ||
2152 Name.startswith("rol") || Name.startswith("ror")) &&
2153 Operands.size() == 3) {
2154 if (isParsingIntelSyntax()) {
2156 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2157 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2158 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2159 Operands.pop_back();
2161 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2162 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2163 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2164 Operands.erase(Operands.begin() + 1);
2168 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2169 // instalias with an immediate operand yet.
2170 if (Name == "int" && Operands.size() == 2) {
2171 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2172 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2173 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2174 Operands.erase(Operands.begin() + 1);
2175 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2182 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2185 TmpInst.setOpcode(Opcode);
2187 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2188 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2189 TmpInst.addOperand(Inst.getOperand(0));
2194 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2195 bool isCmp = false) {
2196 if (!Inst.getOperand(0).isImm() ||
2197 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2200 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2203 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2204 bool isCmp = false) {
2205 if (!Inst.getOperand(0).isImm() ||
2206 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2209 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2212 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2213 bool isCmp = false) {
2214 if (!Inst.getOperand(0).isImm() ||
2215 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2218 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2221 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2222 switch (Inst.getOpcode()) {
2223 default: return false;
2224 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2225 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2226 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2227 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2228 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2229 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2230 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2231 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2232 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2233 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2234 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2235 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2236 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2237 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2238 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2239 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2240 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2241 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2242 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2243 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2244 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2245 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2246 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2247 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2248 case X86::VMOVAPDrr:
2249 case X86::VMOVAPDYrr:
2250 case X86::VMOVAPSrr:
2251 case X86::VMOVAPSYrr:
2252 case X86::VMOVDQArr:
2253 case X86::VMOVDQAYrr:
2254 case X86::VMOVDQUrr:
2255 case X86::VMOVDQUYrr:
2256 case X86::VMOVUPDrr:
2257 case X86::VMOVUPDYrr:
2258 case X86::VMOVUPSrr:
2259 case X86::VMOVUPSYrr: {
2260 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2261 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2265 switch (Inst.getOpcode()) {
2266 default: llvm_unreachable("Invalid opcode");
2267 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2268 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2269 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2270 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2271 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2272 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2273 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2274 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2275 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2276 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2277 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2278 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2280 Inst.setOpcode(NewOpc);
2284 case X86::VMOVSSrr: {
2285 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2286 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2289 switch (Inst.getOpcode()) {
2290 default: llvm_unreachable("Invalid opcode");
2291 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2292 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2294 Inst.setOpcode(NewOpc);
2300 static const char *getSubtargetFeatureName(uint64_t Val);
2302 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2304 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2308 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2309 OperandVector &Operands,
2310 MCStreamer &Out, uint64_t &ErrorInfo,
2311 bool MatchingInlineAsm) {
2312 assert(!Operands.empty() && "Unexpect empty operand list!");
2313 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2314 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2315 ArrayRef<SMRange> EmptyRanges = None;
2317 // First, handle aliases that expand to multiple instructions.
2318 // FIXME: This should be replaced with a real .td file alias mechanism.
2319 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2321 const char *Repl = StringSwitch<const char *>(Op.getToken())
2322 .Case("finit", "fninit")
2323 .Case("fsave", "fnsave")
2324 .Case("fstcw", "fnstcw")
2325 .Case("fstcww", "fnstcw")
2326 .Case("fstenv", "fnstenv")
2327 .Case("fstsw", "fnstsw")
2328 .Case("fstsww", "fnstsw")
2329 .Case("fclex", "fnclex")
2333 Inst.setOpcode(X86::WAIT);
2335 if (!MatchingInlineAsm)
2336 EmitInstruction(Inst, Operands, Out);
2337 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2340 bool WasOriginallyInvalidOperand = false;
2343 // First, try a direct match.
2344 switch (MatchInstructionImpl(Operands, Inst,
2345 ErrorInfo, MatchingInlineAsm,
2346 isParsingIntelSyntax())) {
2349 // Some instructions need post-processing to, for example, tweak which
2350 // encoding is selected. Loop on it while changes happen so the
2351 // individual transformations can chain off each other.
2352 if (!MatchingInlineAsm)
2353 while (processInstruction(Inst, Operands))
2357 if (!MatchingInlineAsm)
2358 EmitInstruction(Inst, Operands, Out);
2359 Opcode = Inst.getOpcode();
2361 case Match_MissingFeature: {
2362 assert(ErrorInfo && "Unknown missing feature!");
2363 // Special case the error message for the very common case where only
2364 // a single subtarget feature is missing.
2365 std::string Msg = "instruction requires:";
2367 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2368 if (ErrorInfo & Mask) {
2370 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2374 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2376 case Match_InvalidOperand:
2377 WasOriginallyInvalidOperand = true;
2379 case Match_MnemonicFail:
2383 // FIXME: Ideally, we would only attempt suffix matches for things which are
2384 // valid prefixes, and we could just infer the right unambiguous
2385 // type. However, that requires substantially more matcher support than the
2388 // Change the operand to point to a temporary token.
2389 StringRef Base = Op.getToken();
2390 SmallString<16> Tmp;
2393 Op.setTokenValue(Tmp.str());
2395 // If this instruction starts with an 'f', then it is a floating point stack
2396 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2397 // 80-bit floating point, which use the suffixes s,l,t respectively.
2399 // Otherwise, we assume that this may be an integer instruction, which comes
2400 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2401 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2403 // Check for the various suffix matches.
2404 uint64_t ErrorInfoIgnore;
2405 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2408 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2409 Tmp.back() = Suffixes[I];
2410 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2411 MatchingInlineAsm, isParsingIntelSyntax());
2412 // If this returned as a missing feature failure, remember that.
2413 if (Match[I] == Match_MissingFeature)
2414 ErrorInfoMissingFeature = ErrorInfoIgnore;
2417 // Restore the old token.
2418 Op.setTokenValue(Base);
2420 // If exactly one matched, then we treat that as a successful match (and the
2421 // instruction will already have been filled in correctly, since the failing
2422 // matches won't have modified it).
2423 unsigned NumSuccessfulMatches =
2424 std::count(std::begin(Match), std::end(Match), Match_Success);
2425 if (NumSuccessfulMatches == 1) {
2427 if (!MatchingInlineAsm)
2428 EmitInstruction(Inst, Operands, Out);
2429 Opcode = Inst.getOpcode();
2433 // Otherwise, the match failed, try to produce a decent error message.
2435 // If we had multiple suffix matches, then identify this as an ambiguous
2437 if (NumSuccessfulMatches > 1) {
2439 unsigned NumMatches = 0;
2440 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2441 if (Match[I] == Match_Success)
2442 MatchChars[NumMatches++] = Suffixes[I];
2444 SmallString<126> Msg;
2445 raw_svector_ostream OS(Msg);
2446 OS << "ambiguous instructions require an explicit suffix (could be ";
2447 for (unsigned i = 0; i != NumMatches; ++i) {
2450 if (i + 1 == NumMatches)
2452 OS << "'" << Base << MatchChars[i] << "'";
2455 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2459 // Okay, we know that none of the variants matched successfully.
2461 // If all of the instructions reported an invalid mnemonic, then the original
2462 // mnemonic was invalid.
2463 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2464 if (!WasOriginallyInvalidOperand) {
2465 ArrayRef<SMRange> Ranges =
2466 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2467 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2468 Ranges, MatchingInlineAsm);
2471 // Recover location info for the operand if we know which was the problem.
2472 if (ErrorInfo != ~0ULL) {
2473 if (ErrorInfo >= Operands.size())
2474 return Error(IDLoc, "too few operands for instruction",
2475 EmptyRanges, MatchingInlineAsm);
2477 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2478 if (Operand.getStartLoc().isValid()) {
2479 SMRange OperandRange = Operand.getLocRange();
2480 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2481 OperandRange, MatchingInlineAsm);
2485 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2489 // If one instruction matched with a missing feature, report this as a
2491 if (std::count(std::begin(Match), std::end(Match),
2492 Match_MissingFeature) == 1) {
2493 std::string Msg = "instruction requires:";
2495 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2496 if (ErrorInfoMissingFeature & Mask) {
2498 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2502 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2505 // If one instruction matched with an invalid operand, report this as an
2507 if (std::count(std::begin(Match), std::end(Match),
2508 Match_InvalidOperand) == 1) {
2509 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2514 // If all of these were an outright failure, report it in a useless way.
2515 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2516 EmptyRanges, MatchingInlineAsm);
2520 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2521 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2524 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2525 StringRef IDVal = DirectiveID.getIdentifier();
2526 if (IDVal == ".word")
2527 return ParseDirectiveWord(2, DirectiveID.getLoc());
2528 else if (IDVal.startswith(".code"))
2529 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2530 else if (IDVal.startswith(".att_syntax")) {
2531 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2532 if (Parser.getTok().getString() == "prefix")
2534 else if (Parser.getTok().getString() == "noprefix")
2535 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2536 "supported: registers must have a "
2537 "'%' prefix in .att_syntax");
2539 getParser().setAssemblerDialect(0);
2541 } else if (IDVal.startswith(".intel_syntax")) {
2542 getParser().setAssemblerDialect(1);
2543 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2544 if (Parser.getTok().getString() == "noprefix")
2546 else if (Parser.getTok().getString() == "prefix")
2547 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2548 "supported: registers must not have "
2549 "a '%' prefix in .intel_syntax");
2556 /// ParseDirectiveWord
2557 /// ::= .word [ expression (, expression)* ]
2558 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2559 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2561 const MCExpr *Value;
2562 if (getParser().parseExpression(Value))
2565 getParser().getStreamer().EmitValue(Value, Size);
2567 if (getLexer().is(AsmToken::EndOfStatement))
2570 // FIXME: Improve diagnostic.
2571 if (getLexer().isNot(AsmToken::Comma)) {
2572 Error(L, "unexpected token in directive");
2583 /// ParseDirectiveCode
2584 /// ::= .code16 | .code32 | .code64
2585 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2586 if (IDVal == ".code16") {
2588 if (!is16BitMode()) {
2589 SwitchMode(X86::Mode16Bit);
2590 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2592 } else if (IDVal == ".code32") {
2594 if (!is32BitMode()) {
2595 SwitchMode(X86::Mode32Bit);
2596 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2598 } else if (IDVal == ".code64") {
2600 if (!is64BitMode()) {
2601 SwitchMode(X86::Mode64Bit);
2602 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2605 Error(L, "unknown directive " + IDVal);
2612 // Force static initialization.
2613 extern "C" void LLVMInitializeX86AsmParser() {
2614 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2615 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2618 #define GET_REGISTER_MATCHER
2619 #define GET_MATCHER_IMPLEMENTATION
2620 #define GET_SUBTARGET_FEATURE_NAME
2621 #include "X86GenAsmMatcher.inc"