X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FAsmParser%2FX86AsmParser.cpp;h=4fbe7f68e6d153b846090f52f14f5a1fd274b5c2;hb=33d60d5e56bbf3e9ed02bc916735419091736ca3;hp=10b00d545979f91ff7ed01779ec0fc2f9228ed56;hpb=b8d6e98e566724f58344d275a4bd675249bb713a;p=oota-llvm.git diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 10b00d54597..4fbe7f68e6d 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -9,17 +9,22 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Twine.h" +#include "X86Subtarget.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -27,45 +32,72 @@ struct X86Operand; class X86ATTAsmParser : public TargetAsmParser { MCAsmParser &Parser; + TargetMachine &TM; + +protected: + unsigned Is64Bit : 1; private: MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } - void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); X86Operand *ParseOperand(); - X86Operand *ParseMemOperand(); + X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); bool ParseDirectiveWord(unsigned Size, SMLoc L); - /// @name Auto-generated Match Functions - /// { - - bool MatchInstruction(const SmallVectorImpl &Operands, + bool MatchInstruction(SMLoc IDLoc, + const SmallVectorImpl &Operands, MCInst &Inst); + /// @name Auto-generated Matcher Functions + /// { + +#define GET_ASSEMBLER_HEADER +#include "X86GenAsmMatcher.inc" + /// } public: - X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) - : TargetAsmParser(T), Parser(_Parser) {} + X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) + : TargetAsmParser(T), Parser(_Parser), TM(TM) { - virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures( + &TM.getSubtarget())); + } + + virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands); virtual bool ParseDirective(AsmToken DirectiveID); }; - + +class X86_32ATTAsmParser : public X86ATTAsmParser { +public: + X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) + : X86ATTAsmParser(T, _Parser, TM) { + Is64Bit = false; + } +}; + +class X86_64ATTAsmParser : public X86ATTAsmParser { +public: + X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) + : X86ATTAsmParser(T, _Parser, TM) { + Is64Bit = true; + } +}; + } // end anonymous namespace /// @name Auto-generated Match Functions -/// { +/// { static unsigned MatchRegisterName(StringRef Name); @@ -84,7 +116,7 @@ struct X86Operand : public MCParsedAsmOperand { } Kind; SMLoc StartLoc, EndLoc; - + union { struct { const char *Data; @@ -110,16 +142,23 @@ struct X86Operand : public MCParsedAsmOperand { X86Operand(KindTy K, SMLoc Start, SMLoc End) : Kind(K), StartLoc(Start), EndLoc(End) {} - + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } + virtual void dump(raw_ostream &OS) const {} + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); } + void setTokenValue(StringRef Value) { + assert(Kind == Token && "Invalid access!"); + Tok.Data = Value.data(); + Tok.Length = Value.size(); + } unsigned getReg() const { assert(Kind == Register && "Invalid access!"); @@ -155,21 +194,74 @@ struct X86Operand : public MCParsedAsmOperand { bool isToken() const {return Kind == Token; } bool isImm() const { return Kind == Immediate; } - - bool isImmSExt8() const { - // Accept immediates which fit in 8 bits when sign extended, and - // non-absolute immediates. + + bool isImmSExti16i8() const { if (!isImm()) return false; - if (const MCConstantExpr *CE = dyn_cast(getImm())) { - int64_t Value = CE->getValue(); - return Value == (int64_t) (int8_t) Value; - } + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) + return true; - return true; + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } - + bool isImmSExti32i8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isImmSExti64i8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isImmSExti64i32() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000007FFFFFFFULL)|| + (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isMem() const { return Kind == Memory; } bool isAbsMem() const { @@ -177,12 +269,16 @@ struct X86Operand : public MCParsedAsmOperand { !getMemIndexReg() && getMemScale() == 1; } - bool isNoSegMem() const { - return Kind == Memory && !getMemSegReg(); - } - bool isReg() const { return Kind == Register; } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); @@ -190,13 +286,7 @@ struct X86Operand : public MCParsedAsmOperand { void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); - } - - void addImmSExt8Operands(MCInst &Inst, unsigned N) const { - // FIXME: Support user customization of the render method. - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -204,7 +294,7 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); Inst.addOperand(MCOperand::CreateImm(getMemScale())); Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + addExpr(Inst, getMemDisp()); Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); } @@ -213,14 +303,6 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); } - void addNoSegMemOperands(MCInst &Inst, unsigned N) const { - assert((N == 4) && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); - Inst.addOperand(MCOperand::CreateImm(getMemScale())); - Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); - } - static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { X86Operand *Res = new X86Operand(Token, Loc, Loc); Res->Tok.Data = Str.data(); @@ -291,7 +373,74 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. RegNo = MatchRegisterName(Tok.getString()); + + // If the match failed, try the register name as lowercase. + if (RegNo == 0) + RegNo = MatchRegisterName(LowercaseString(Tok.getString())); + // FIXME: This should be done using Requires and + // Requires so "eiz" usage in 64-bit instructions + // can be also checked. + if (RegNo == X86::RIZ && !Is64Bit) + return Error(Tok.getLoc(), "riz register in 64-bit mode only"); + + // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. + if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { + RegNo = X86::ST0; + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat 'st' + + // Check to see if we have '(4)' after %st. + if (getLexer().isNot(AsmToken::LParen)) + return false; + // Lex the paren. + getParser().Lex(); + + const AsmToken &IntTok = Parser.getTok(); + if (IntTok.isNot(AsmToken::Integer)) + return Error(IntTok.getLoc(), "expected stack index"); + switch (IntTok.getIntVal()) { + case 0: RegNo = X86::ST0; break; + case 1: RegNo = X86::ST1; break; + case 2: RegNo = X86::ST2; break; + case 3: RegNo = X86::ST3; break; + case 4: RegNo = X86::ST4; break; + case 5: RegNo = X86::ST5; break; + case 6: RegNo = X86::ST6; break; + case 7: RegNo = X86::ST7; break; + default: return Error(IntTok.getLoc(), "invalid stack index"); + } + + if (getParser().Lex().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "expected ')'"); + + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat ')' + return false; + } + + // If this is "db[0-7]", match it as an alias + // for dr[0-7]. + if (RegNo == 0 && Tok.getString().size() == 3 && + Tok.getString().startswith("db")) { + switch (Tok.getString()[2]) { + case '0': RegNo = X86::DR0; break; + case '1': RegNo = X86::DR1; break; + case '2': RegNo = X86::DR2; break; + case '3': RegNo = X86::DR3; break; + case '4': RegNo = X86::DR4; break; + case '5': RegNo = X86::DR5; break; + case '6': RegNo = X86::DR6; break; + case '7': RegNo = X86::DR7; break; + } + + if (RegNo != 0) { + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat it. + return false; + } + } + if (RegNo == 0) return Error(Tok.getLoc(), "invalid register name"); @@ -303,14 +452,26 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, X86Operand *X86ATTAsmParser::ParseOperand() { switch (getLexer().getKind()) { default: - return ParseMemOperand(); + // Parse a memory operand with no segment register. + return ParseMemOperand(0, Parser.getTok().getLoc()); case AsmToken::Percent: { - // FIXME: if a segment register, this could either be just the seg reg, or - // the start of a memory operand. + // Read the register. unsigned RegNo; SMLoc Start, End; if (ParseRegister(RegNo, Start, End)) return 0; - return X86Operand::CreateReg(RegNo, Start, End); + if (RegNo == X86::EIZ || RegNo == X86::RIZ) { + Error(Start, "eiz and riz can only be used as index registers"); + return 0; + } + + // If this is a segment register followed by a ':', then this is the start + // of a memory reference, otherwise this is a normal register reference. + if (getLexer().isNot(AsmToken::Colon)) + return X86Operand::CreateReg(RegNo, Start, End); + + + getParser().Lex(); // Eat the colon. + return ParseMemOperand(RegNo, Start); } case AsmToken::Dollar: { // $42 -> immediate. @@ -324,13 +485,10 @@ X86Operand *X86ATTAsmParser::ParseOperand() { } } -/// ParseMemOperand: segment: disp(basereg, indexreg, scale) -X86Operand *X86ATTAsmParser::ParseMemOperand() { - SMLoc MemStart = Parser.getTok().getLoc(); - - // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. - unsigned SegReg = 0; - +/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix +/// has already been parsed if present. +X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { + // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The // only way to do this without lookahead is to eat the '(' and see what is @@ -339,7 +497,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; if (getParser().ParseExpression(Disp, ExprEnd)) return 0; - + // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. if (getLexer().isNot(AsmToken::LParen)) { @@ -348,7 +506,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { return X86Operand::CreateMem(Disp, MemStart, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } - + // Eat the '('. Parser.Lex(); } else { @@ -356,17 +514,17 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { // so we have to eat the ( to see beyond it. SMLoc LParenLoc = Parser.getTok().getLoc(); Parser.Lex(); // Eat the '('. - + if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { // Nothing to do here, fall into the code below with the '(' part of the // memory operand consumed. } else { SMLoc ExprEnd; - + // It must be an parenthesized expression, parse it now. if (getParser().ParseParenExpression(Disp, ExprEnd)) return 0; - + // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. if (getLexer().isNot(AsmToken::LParen)) { @@ -375,21 +533,25 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } - + // Eat the '('. Parser.Lex(); } } - + // If we reached here, then we just ate the ( of the memory operand. Process // the rest of the memory operand. unsigned BaseReg = 0, IndexReg = 0, Scale = 1; - + if (getLexer().is(AsmToken::Percent)) { SMLoc L; if (ParseRegister(BaseReg, L, L)) return 0; + if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { + Error(L, "eiz and riz can only be used as index registers"); + return 0; + } } - + if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. @@ -398,11 +560,11 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { // correctly. // // Not that even though it would be completely consistent to support syntax - // like "1(%eax,,1)", the assembler doesn't. + // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. if (getLexer().is(AsmToken::Percent)) { SMLoc L; if (ParseRegister(IndexReg, L, L)) return 0; - + if (getLexer().isNot(AsmToken::RParen)) { // Parse the scale amount: // ::= ',' [scale-expression] @@ -419,7 +581,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { int64_t ScaleVal; if (getParser().ParseAbsoluteExpression(ScaleVal)) return 0; - + // Validate the scale amount. if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); @@ -429,19 +591,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { } } } else if (getLexer().isNot(AsmToken::RParen)) { - // Otherwise we have the unsupported form of a scale amount without an + // A scale amount without an index is ignored. // index. SMLoc Loc = Parser.getTok().getLoc(); int64_t Value; if (getParser().ParseAbsoluteExpression(Value)) return 0; - - Error(Loc, "cannot have scale factor without index register"); - return 0; + + if (Value != 1) + Warning(Loc, "scale factor without index register is ignored"); + Scale = 1; } } - + // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. if (getLexer().isNot(AsmToken::RParen)) { Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); @@ -449,34 +612,176 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { } SMLoc MemEnd = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. - + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, MemStart, MemEnd); } bool X86ATTAsmParser:: -ParseInstruction(const StringRef &Name, SMLoc NameLoc, +ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands) { - // FIXME: Hack to recognize "sal..." for now. We need a way to represent - // alternative syntaxes in the .td file, without requiring instruction - // duplication. - if (Name.startswith("sal")) { - std::string Tmp = "shl" + Name.substr(3).str(); - Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc)); - } else { - // FIXME: This is a hack. We eventually want to add a general pattern - // mechanism to be used in the table gen file for these assembly names that - // use the same opcodes. Also we should only allow the "alternate names" - // for rep and repne with the instructions they can only appear with. - StringRef PatchedName = Name; - if (Name == "repe" || Name == "repz") - PatchedName = "rep"; - else if (Name == "repnz") - PatchedName = "repne"; - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to + // represent alternative syntaxes in the .td file, without requiring + // instruction duplication. + StringRef PatchedName = StringSwitch(Name) + .Case("sal", "shl") + .Case("salb", "shlb") + .Case("sall", "shll") + .Case("salq", "shlq") + .Case("salw", "shlw") + .Case("repe", "rep") + .Case("repz", "rep") + .Case("repnz", "repne") + .Case("iret", "iretl") + .Case("sysret", "sysretl") + .Case("push", Is64Bit ? "pushq" : "pushl") + .Case("pop", Is64Bit ? "popq" : "popl") + .Case("pushf", Is64Bit ? "pushfq" : "pushfl") + .Case("popf", Is64Bit ? "popfq" : "popfl") + .Case("pushfd", "pushfl") + .Case("popfd", "popfl") + .Case("retl", Is64Bit ? "retl" : "ret") + .Case("retq", Is64Bit ? "ret" : "retq") + .Case("setz", "sete") .Case("setnz", "setne") + .Case("setc", "setb") .Case("setna", "setbe") + .Case("setnae", "setb").Case("setnb", "setae") + .Case("setnbe", "seta").Case("setnc", "setae") + .Case("setng", "setle").Case("setnge", "setl") + .Case("setnl", "setge").Case("setnle", "setg") + .Case("setpe", "setp") .Case("setpo", "setnp") + .Case("jz", "je") .Case("jnz", "jne") + .Case("jc", "jb") .Case("jna", "jbe") + .Case("jnae", "jb").Case("jnb", "jae") + .Case("jnbe", "ja").Case("jnc", "jae") + .Case("jng", "jle").Case("jnge", "jl") + .Case("jnl", "jge").Case("jnle", "jg") + .Case("jpe", "jp") .Case("jpo", "jnp") + // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands. + .Case("cmovcw", "cmovbw") .Case("cmovcl", "cmovbl") + .Case("cmovcq", "cmovbq") .Case("cmovc", "cmovb") + .Case("cmovnaew","cmovbw") .Case("cmovnael","cmovbl") + .Case("cmovnaeq","cmovbq") .Case("cmovnae", "cmovb") + .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel") + .Case("cmovnaq", "cmovbeq").Case("cmovna", "cmovbe") + .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael") + .Case("cmovnbq", "cmovaeq").Case("cmovnb", "cmovae") + .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval") + .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova") + .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael") + .Case("cmovncq", "cmovaeq").Case("cmovnc", "cmovae") + .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel") + .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle") + .Case("cmovnw", "cmovgew").Case("cmovnl", "cmovgel") + .Case("cmovnq", "cmovgeq").Case("cmovn", "cmovge") + .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel") + .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle") + .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll") + .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl") + .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel") + .Case("cmovnlq", "cmovgeq").Case("cmovnl", "cmovge") + .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl") + .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg") + .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel") + .Case("cmovnzq", "cmovneq").Case("cmovnz", "cmovne") + .Case("cmovzw", "cmovew") .Case("cmovzl", "cmovel") + .Case("cmovzq", "cmoveq") .Case("cmovz", "cmove") + .Case("fwait", "wait") + .Case("movzx", "movzb") // FIXME: Not correct. + .Case("fildq", "fildll") + .Default(Name); + + // FIXME: Hack to recognize cmp{ss,sd,ps,pd}. + const MCExpr *ExtraImmOp = 0; + if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && + (PatchedName.endswith("ss") || PatchedName.endswith("sd") || + PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { + bool IsVCMP = PatchedName.startswith("vcmp"); + unsigned SSECCIdx = IsVCMP ? 4 : 3; + unsigned SSEComparisonCode = StringSwitch( + PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) + .Case("eq", 0) + .Case("lt", 1) + .Case("le", 2) + .Case("unord", 3) + .Case("neq", 4) + .Case("nlt", 5) + .Case("nle", 6) + .Case("ord", 7) + .Case("eq_uq", 8) + .Case("nge", 9) + .Case("ngt", 0x0A) + .Case("false", 0x0B) + .Case("neq_oq", 0x0C) + .Case("ge", 0x0D) + .Case("gt", 0x0E) + .Case("true", 0x0F) + .Case("eq_os", 0x10) + .Case("lt_oq", 0x11) + .Case("le_oq", 0x12) + .Case("unord_s", 0x13) + .Case("neq_us", 0x14) + .Case("nlt_uq", 0x15) + .Case("nle_uq", 0x16) + .Case("ord_s", 0x17) + .Case("eq_us", 0x18) + .Case("nge_uq", 0x19) + .Case("ngt_uq", 0x1A) + .Case("false_os", 0x1B) + .Case("neq_os", 0x1C) + .Case("ge_oq", 0x1D) + .Case("gt_oq", 0x1E) + .Case("true_us", 0x1F) + .Default(~0U); + if (SSEComparisonCode != ~0U) { + ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, + getParser().getContext()); + if (PatchedName.endswith("ss")) { + PatchedName = IsVCMP ? "vcmpss" : "cmpss"; + } else if (PatchedName.endswith("sd")) { + PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; + } else if (PatchedName.endswith("ps")) { + PatchedName = IsVCMP ? "vcmpps" : "cmpps"; + } else { + assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); + PatchedName = IsVCMP ? "vcmppd" : "cmppd"; + } + } } - if (getLexer().isNot(AsmToken::EndOfStatement)) { + // FIXME: Hack to recognize vpclmuldq + if (PatchedName.startswith("vpclmul")) { + unsigned CLMULQuadWordSelect = StringSwitch( + PatchedName.slice(7, PatchedName.size() - 2)) + .Case("lqlq", 0x00) // src1[63:0], src2[63:0] + .Case("hqlq", 0x01) // src1[127:64], src2[63:0] + .Case("lqhq", 0x10) // src1[63:0], src2[127:64] + .Case("hqhq", 0x11) // src1[127:64], src2[127:64] + .Default(~0U); + if (CLMULQuadWordSelect != ~0U) { + ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect, + getParser().getContext()); + assert(PatchedName.endswith("dq") && "Unexpected mnemonic!"); + PatchedName = "vpclmulqdq"; + } + } + + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + + if (ExtraImmOp) + Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); + + + // Determine whether this is an instruction prefix. + bool isPrefix = + PatchedName == "lock" || PatchedName == "rep" || + PatchedName == "repne"; + + + // This does the actual operand parsing. Don't parse any more if we have a + // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we + // just want to parse the "lock" as the first instruction and the "incl" as + // the next one. + if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) { @@ -488,20 +793,241 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, // Read the first operand. if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); - else + else { + Parser.EatToEndOfStatement(); return true; - + } + while (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. // Parse and remember the operand. if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); - else + else { + Parser.EatToEndOfStatement(); return true; + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Parser.EatToEndOfStatement(); + return TokError("unexpected token in argument list"); + } + } + + if (getLexer().is(AsmToken::EndOfStatement)) + Parser.Lex(); // Consume the EndOfStatement + + // FIXME: Hack to handle recognize s{hr,ar,hl} $1, . Canonicalize to + // "shift ". + if ((Name.startswith("shr") || Name.startswith("sar") || + Name.startswith("shl")) && + Operands.size() == 3) { + X86Operand *Op1 = static_cast(Operands[1]); + if (Op1->isImm() && isa(Op1->getImm()) && + cast(Op1->getImm())->getValue() == 1) { + delete Operands[1]; + Operands.erase(Operands.begin() + 1); + } + } + + // FIXME: Hack to handle recognize "rc[lr] " -> "rcl $1, ". + if ((Name.startswith("rcl") || Name.startswith("rcr")) && + Operands.size() == 2) { + const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext()); + Operands.push_back(X86Operand::CreateImm(One, NameLoc, NameLoc)); + std::swap(Operands[1], Operands[2]); + } + + // FIXME: Hack to handle recognize "sh[lr]d op,op" -> "shld $1, op,op". + if ((Name.startswith("shld") || Name.startswith("shrd")) && + Operands.size() == 3) { + const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext()); + Operands.insert(Operands.begin()+1, + X86Operand::CreateImm(One, NameLoc, NameLoc)); + } + + + // FIXME: Hack to handle recognize "in[bwl] ". Canonicalize it to + // "inb , %al". + if ((Name == "inb" || Name == "inw" || Name == "inl") && + Operands.size() == 2) { + unsigned Reg; + if (Name[2] == 'b') + Reg = MatchRegisterName("al"); + else if (Name[2] == 'w') + Reg = MatchRegisterName("ax"); + else + Reg = MatchRegisterName("eax"); + SMLoc Loc = Operands.back()->getEndLoc(); + Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc)); + } + + // FIXME: Hack to handle recognize "out[bwl] ". Canonicalize it to + // "outb %al, ". + if ((Name == "outb" || Name == "outw" || Name == "outl") && + Operands.size() == 2) { + unsigned Reg; + if (Name[3] == 'b') + Reg = MatchRegisterName("al"); + else if (Name[3] == 'w') + Reg = MatchRegisterName("ax"); + else + Reg = MatchRegisterName("eax"); + SMLoc Loc = Operands.back()->getEndLoc(); + Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc)); + std::swap(Operands[1], Operands[2]); + } + + // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx". + if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && + Operands.size() == 3) { + X86Operand &Op = *(X86Operand*)Operands.back(); + if (Op.isMem() && Op.Mem.SegReg == 0 && + isa(Op.Mem.Disp) && + cast(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { + SMLoc Loc = Op.getEndLoc(); + Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); + delete &Op; } } + + // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as + // "f{mul*,add*,sub*,div*} $op" + if ((Name.startswith("fmul") || Name.startswith("fadd") || + Name.startswith("fsub") || Name.startswith("fdiv")) && + Operands.size() == 3 && + static_cast(Operands[2])->isReg() && + static_cast(Operands[2])->getReg() == X86::ST0) { + delete Operands[2]; + Operands.erase(Operands.begin() + 2); + } + // FIXME: Hack to handle "imul , B" which is an alias for "imul , B, + // B". + if (Name.startswith("imul") && Operands.size() == 3 && + static_cast(Operands[1])->isImm() && + static_cast(Operands.back())->isReg()) { + X86Operand *Op = static_cast(Operands.back()); + Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(), + Op->getEndLoc())); + } + + // 'sldt ' can be encoded with either sldtw or sldtq with the same + // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity + // errors, since its encoding is the most compact. + if (Name == "sldt" && Operands.size() == 2 && + static_cast(Operands[1])->isMem()) { + delete Operands[0]; + Operands[0] = X86Operand::CreateToken("sldtw", NameLoc); + } + + // The assembler accepts "xchgX , " and "xchgX , " as + // synonyms. Our tables only have the ", " form, so if we see the + // other operand order, swap them. + if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq"|| + Name == "xchg") + if (Operands.size() == 3 && + static_cast(Operands[1])->isMem() && + static_cast(Operands[2])->isReg()) { + std::swap(Operands[1], Operands[2]); + } + + // The assembler accepts "testX , " and "testX , " as + // synonyms. Our tables only have the ", " form, so if we see the + // other operand order, swap them. + if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq"|| + Name == "test") + if (Operands.size() == 3 && + static_cast(Operands[1])->isReg() && + static_cast(Operands[2])->isMem()) { + std::swap(Operands[1], Operands[2]); + } + + // The assembler accepts these instructions with no operand as a synonym for + // an instruction acting on st(1). e.g. "fxch" -> "fxch %st(1)". + if ((Name == "fxch" || Name == "fucom" || Name == "fucomp" || + Name == "faddp" || Name == "fsubp" || Name == "fsubrp" || + Name == "fmulp" || Name == "fdivp" || Name == "fdivrp") && + Operands.size() == 1) { + Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"), + NameLoc, NameLoc)); + } + + // The assembler accepts these instructions with two few operands as a synonym + // for taking %st(1),%st(0) or X, %st(0). + if ((Name == "fcomi" || Name == "fucomi") && Operands.size() < 3) { + if (Operands.size() == 1) + Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"), + NameLoc, NameLoc)); + Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(0)"), + NameLoc, NameLoc)); + } + + // The assembler accepts various amounts of brokenness for fnstsw. + if (Name == "fnstsw") { + if (Operands.size() == 2 && + static_cast(Operands[1])->isReg()) { + // "fnstsw al" and "fnstsw eax" -> "fnstw" + unsigned Reg = static_cast(Operands[1])->Reg.RegNo; + if (Reg == MatchRegisterName("eax") || + Reg == MatchRegisterName("al")) { + delete Operands[1]; + Operands.pop_back(); + } + } + + // "fnstw" -> "fnstw %ax" + if (Operands.size() == 1) + Operands.push_back(X86Operand::CreateReg(MatchRegisterName("ax"), + NameLoc, NameLoc)); + } + + // jmp $42,$5 -> ljmp, similarly for call. + if ((Name.startswith("call") || Name.startswith("jmp")) && + Operands.size() == 3 && + static_cast(Operands[1])->isImm() && + static_cast(Operands[2])->isImm()) { + const char *NewOpName = StringSwitch(Name) + .Case("jmp", "ljmp") + .Case("jmpw", "ljmpw") + .Case("jmpl", "ljmpl") + .Case("jmpq", "ljmpq") + .Case("call", "lcall") + .Case("callw", "lcallw") + .Case("calll", "lcalll") + .Case("callq", "lcallq") + .Default(0); + if (NewOpName) { + delete Operands[0]; + Operands[0] = X86Operand::CreateToken(NewOpName, NameLoc); + Name = NewOpName; + } + } + + // lcall and ljmp -> lcalll and ljmpl + if ((Name == "lcall" || Name == "ljmp") && Operands.size() == 3) { + delete Operands[0]; + Operands[0] = X86Operand::CreateToken(Name == "lcall" ? "lcalll" : "ljmpl", + NameLoc); + } + + // movsd -> movsl (when no operands are specified). + if (Name == "movsd" && Operands.size() == 1) { + delete Operands[0]; + Operands[0] = X86Operand::CreateToken("movsl", NameLoc); + } + + // fstp -> fstps . Without this, we'll default to fstpl due to + // suffix searching. + if (Name == "fstp" && Operands.size() == 2 && + static_cast(Operands[1])->isMem()) { + delete Operands[0]; + Operands[0] = X86Operand::CreateToken("fstps", NameLoc); + } + return false; } @@ -525,7 +1051,7 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); @@ -537,13 +1063,155 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } + +bool X86ATTAsmParser:: +MatchInstruction(SMLoc IDLoc, + const SmallVectorImpl &Operands, + MCInst &Inst) { + assert(!Operands.empty() && "Unexpect empty operand list!"); + + bool WasOriginallyInvalidOperand = false; + unsigned OrigErrorInfo; + + // First, try a direct match. + switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) { + case Match_Success: + return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: + WasOriginallyInvalidOperand = true; + break; + case Match_MnemonicFail: + break; + } + + // FIXME: Ideally, we would only attempt suffix matches for things which are + // valid prefixes, and we could just infer the right unambiguous + // type. However, that requires substantially more matcher support than the + // following hack. + + X86Operand *Op = static_cast(Operands[0]); + assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + + // Change the operand to point to a temporary token. + StringRef Base = Op->getToken(); + SmallString<16> Tmp; + Tmp += Base; + Tmp += ' '; + Op->setTokenValue(Tmp.str()); + + // Check for the various suffix matches. + Tmp[Base.size()] = 'b'; + unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo; + MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo); + Tmp[Base.size()] = 'w'; + MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo); + Tmp[Base.size()] = 'l'; + MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo); + Tmp[Base.size()] = 'q'; + MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo); + + // Restore the old token. + Op->setTokenValue(Base); + + // If exactly one matched, then we treat that as a successful match (and the + // instruction will already have been filled in correctly, since the failing + // matches won't have modified it). + unsigned NumSuccessfulMatches = + (MatchB == Match_Success) + (MatchW == Match_Success) + + (MatchL == Match_Success) + (MatchQ == Match_Success); + if (NumSuccessfulMatches == 1) + return false; + + // Otherwise, the match failed, try to produce a decent error message. + + // If we had multiple suffix matches, then identify this as an ambiguous + // match. + if (NumSuccessfulMatches > 1) { + char MatchChars[4]; + unsigned NumMatches = 0; + if (MatchB == Match_Success) + MatchChars[NumMatches++] = 'b'; + if (MatchW == Match_Success) + MatchChars[NumMatches++] = 'w'; + if (MatchL == Match_Success) + MatchChars[NumMatches++] = 'l'; + if (MatchQ == Match_Success) + MatchChars[NumMatches++] = 'q'; + + SmallString<126> Msg; + raw_svector_ostream OS(Msg); + OS << "ambiguous instructions require an explicit suffix (could be "; + for (unsigned i = 0; i != NumMatches; ++i) { + if (i != 0) + OS << ", "; + if (i + 1 == NumMatches) + OS << "or "; + OS << "'" << Base << MatchChars[i] << "'"; + } + OS << ")"; + Error(IDLoc, OS.str()); + return true; + } + + // Okay, we know that none of the variants matched successfully. + + // If all of the instructions reported an invalid mnemonic, then the original + // mnemonic was invalid. + if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) && + (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) { + if (!WasOriginallyInvalidOperand) { + Error(IDLoc, "invalid instruction mnemonic '" + Base + "'"); + return true; + } + + // Recover location info for the operand if we know which was the problem. + SMLoc ErrorLoc = IDLoc; + if (OrigErrorInfo != ~0U) { + if (OrigErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + + // If one instruction matched with a missing feature, report this as a + // missing feature. + if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) + + (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){ + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + } + + // If one instruction matched with an invalid operand, report this as an + // operand failure. + if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) + + (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){ + Error(IDLoc, "invalid operand for instruction"); + return true; + } + + // If all of these were an outright failure, report it in a useless way. + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); + return true; +} + + extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { - RegisterAsmParser X(TheX86_32Target); - RegisterAsmParser Y(TheX86_64Target); + RegisterAsmParser X(TheX86_32Target); + RegisterAsmParser Y(TheX86_64Target); LLVMInitializeX86AsmLexer(); } +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION #include "X86GenAsmMatcher.inc"