#include "llvm/Target/TargetAsmParser.h"
#include "X86.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
class X86ATTAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
+ TargetMachine &TM;
protected:
unsigned Is64Bit : 1;
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
-
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
- void InstructionCleanup(MCInst &Inst);
-
- /// @name Auto-generated Match Functions
- /// {
-
- bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ bool MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCInst &Inst);
- bool MatchInstructionImpl(
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
-
+ /// @name Auto-generated Matcher Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
+
/// }
public:
- X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : TargetAsmParser(T), Parser(_Parser) {}
+ X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(TM) {
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(
+ &TM.getSubtarget<X86Subtarget>()));
+ }
- virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
};
-
+
class X86_32ATTAsmParser : public X86ATTAsmParser {
public:
- X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : X86ATTAsmParser(T, _Parser) {
+ X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, _Parser, TM) {
Is64Bit = false;
}
};
class X86_64ATTAsmParser : public X86ATTAsmParser {
public:
- X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : X86ATTAsmParser(T, _Parser) {
+ X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, _Parser, TM) {
Is64Bit = true;
}
};
} // end anonymous namespace
/// @name Auto-generated Match Functions
-/// {
+/// {
static unsigned MatchRegisterName(StringRef Name);
} Kind;
SMLoc StartLoc, EndLoc;
-
+
union {
struct {
const char *Data;
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
+ virtual void dump(raw_ostream &OS) const {}
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
bool isToken() const {return Kind == Token; }
bool isImm() const { return Kind == Immediate; }
-
+
bool isImmSExti16i8() const {
if (!isImm())
return false;
!getMemIndexReg() && getMemScale() == 1;
}
- bool isNoSegMem() const {
- return Kind == Memory && !getMemSegReg();
- }
-
bool isReg() const { return Kind == Register; }
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
- void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
- assert((N == 4) && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
- Inst.addOperand(MCOperand::CreateImm(getMemScale()));
- Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
- addExpr(Inst, getMemDisp());
- }
-
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
X86Operand *Res = new X86Operand(Token, Loc, Loc);
Res->Tok.Data = Str.data();
// FIXME: Validate register for the current architecture; we have to do
// validation later, so maybe there is no need for this here.
RegNo = MatchRegisterName(Tok.getString());
+
+ // If the match failed, try the register name as lowercase.
+ if (RegNo == 0)
+ RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
- // Parse %st(1) and "%st" as "%st(0)"
- if (RegNo == 0 && Tok.getString() == "st") {
+ // FIXME: This should be done using Requires<In32BitMode> and
+ // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
+ // can be also checked.
+ if (RegNo == X86::RIZ && !Is64Bit)
+ return Error(Tok.getLoc(), "riz register in 64-bit mode only");
+
+ // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+ if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
RegNo = X86::ST0;
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat 'st'
-
+
// Check to see if we have '(4)' after %st.
if (getLexer().isNot(AsmToken::LParen))
return false;
case 7: RegNo = X86::ST7; break;
default: return Error(IntTok.getLoc(), "invalid stack index");
}
-
+
if (getParser().Lex().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
-
+
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat ')'
return false;
}
-
+
+ // If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (RegNo == 0 && Tok.getString().size() == 3 &&
+ Tok.getString().startswith("db")) {
+ switch (Tok.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != 0) {
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat it.
+ return false;
+ }
+ }
+
if (RegNo == 0)
return Error(Tok.getLoc(), "invalid register name");
unsigned RegNo;
SMLoc Start, End;
if (ParseRegister(RegNo, Start, End)) return 0;
-
+ if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
+ Error(Start, "eiz and riz can only be used as index registers");
+ return 0;
+ }
+
// If this is a segment register followed by a ':', then this is the start
// of a memory reference, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
-
-
+
+
getParser().Lex(); // Eat the colon.
return ParseMemOperand(RegNo, Start);
}
/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
/// has already been parsed if present.
X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
-
+
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
-
+
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
-
+
// Eat the '('.
Parser.Lex();
} else {
// so we have to eat the ( to see beyond it.
SMLoc LParenLoc = Parser.getTok().getLoc();
Parser.Lex(); // Eat the '('.
-
+
if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
// Nothing to do here, fall into the code below with the '(' part of the
// memory operand consumed.
} else {
SMLoc ExprEnd;
-
+
// It must be an parenthesized expression, parse it now.
if (getParser().ParseParenExpression(Disp, ExprEnd))
return 0;
-
+
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
-
+
// Eat the '('.
Parser.Lex();
}
}
-
+
// If we reached here, then we just ate the ( of the memory operand. Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
-
+
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
if (ParseRegister(BaseReg, L, L)) return 0;
+ if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
+ Error(L, "eiz and riz can only be used as index registers");
+ return 0;
+ }
}
-
+
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
// correctly.
//
// Not that even though it would be completely consistent to support syntax
- // like "1(%eax,,1)", the assembler doesn't.
+ // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
if (ParseRegister(IndexReg, L, L)) return 0;
-
+
if (getLexer().isNot(AsmToken::RParen)) {
// Parse the scale amount:
// ::= ',' [scale-expression]
int64_t ScaleVal;
if (getParser().ParseAbsoluteExpression(ScaleVal))
return 0;
-
+
// Validate the scale amount.
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
}
}
} else if (getLexer().isNot(AsmToken::RParen)) {
- // Otherwise we have the unsupported form of a scale amount without an
+ // A scale amount without an index is ignored.
// index.
SMLoc Loc = Parser.getTok().getLoc();
int64_t Value;
if (getParser().ParseAbsoluteExpression(Value))
return 0;
-
- Error(Loc, "cannot have scale factor without index register");
- return 0;
+
+ if (Value != 1)
+ Warning(Loc, "scale factor without index register is ignored");
+ Scale = 1;
}
}
-
+
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
}
SMLoc MemEnd = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
-
+
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
MemStart, MemEnd);
}
bool X86ATTAsmParser::
-ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // The various flavors of pushf and popf use Requires<In32BitMode> and
- // Requires<In64BitMode>, but the assembler doesn't yet implement that.
- // For now, just do a manual check to prevent silent misencoding.
- if (Is64Bit) {
- if (Name == "popfl")
- return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
- else if (Name == "pushfl")
- return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
- } else {
- if (Name == "popfq")
- return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
- else if (Name == "pushfq")
- return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
- }
-
// FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
// represent alternative syntaxes in the .td file, without requiring
// instruction duplication.
.Case("repe", "rep")
.Case("repz", "rep")
.Case("repnz", "repne")
+ .Case("iret", "iretl")
+ .Case("sysret", "sysretl")
+ .Case("push", Is64Bit ? "pushq" : "pushl")
+ .Case("pop", Is64Bit ? "popq" : "popl")
.Case("pushf", Is64Bit ? "pushfq" : "pushfl")
.Case("popf", Is64Bit ? "popfq" : "popfl")
+ .Case("pushfd", "pushfl")
+ .Case("popfd", "popfl")
.Case("retl", Is64Bit ? "retl" : "ret")
.Case("retq", Is64Bit ? "ret" : "retq")
- .Case("setz", "sete")
- .Case("setnz", "setne")
- .Case("jz", "je")
- .Case("jnz", "jne")
- .Case("cmovcl", "cmovbl")
- .Case("cmovcl", "cmovbl")
- .Case("cmovnal", "cmovbel")
- .Case("cmovnbl", "cmovael")
- .Case("cmovnbel", "cmoval")
- .Case("cmovncl", "cmovael")
- .Case("cmovngl", "cmovlel")
- .Case("cmovnl", "cmovgel")
- .Case("cmovngl", "cmovlel")
- .Case("cmovngel", "cmovll")
- .Case("cmovnll", "cmovgel")
- .Case("cmovnlel", "cmovgl")
- .Case("cmovnzl", "cmovnel")
- .Case("cmovzl", "cmovel")
+ .Case("setz", "sete") .Case("setnz", "setne")
+ .Case("setc", "setb") .Case("setna", "setbe")
+ .Case("setnae", "setb").Case("setnb", "setae")
+ .Case("setnbe", "seta").Case("setnc", "setae")
+ .Case("setng", "setle").Case("setnge", "setl")
+ .Case("setnl", "setge").Case("setnle", "setg")
+ .Case("setpe", "setp") .Case("setpo", "setnp")
+ .Case("jz", "je") .Case("jnz", "jne")
+ .Case("jc", "jb") .Case("jna", "jbe")
+ .Case("jnae", "jb").Case("jnb", "jae")
+ .Case("jnbe", "ja").Case("jnc", "jae")
+ .Case("jng", "jle").Case("jnge", "jl")
+ .Case("jnl", "jge").Case("jnle", "jg")
+ .Case("jpe", "jp") .Case("jpo", "jnp")
+ // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands.
+ .Case("cmovcw", "cmovbw") .Case("cmovcl", "cmovbl")
+ .Case("cmovcq", "cmovbq") .Case("cmovc", "cmovb")
+ .Case("cmovnaew","cmovbw") .Case("cmovnael","cmovbl")
+ .Case("cmovnaeq","cmovbq") .Case("cmovnae", "cmovb")
+ .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel")
+ .Case("cmovnaq", "cmovbeq").Case("cmovna", "cmovbe")
+ .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael")
+ .Case("cmovnbq", "cmovaeq").Case("cmovnb", "cmovae")
+ .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval")
+ .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova")
+ .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael")
+ .Case("cmovncq", "cmovaeq").Case("cmovnc", "cmovae")
+ .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
+ .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle")
+ .Case("cmovnw", "cmovgew").Case("cmovnl", "cmovgel")
+ .Case("cmovnq", "cmovgeq").Case("cmovn", "cmovge")
+ .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
+ .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle")
+ .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll")
+ .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl")
+ .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel")
+ .Case("cmovnlq", "cmovgeq").Case("cmovnl", "cmovge")
+ .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl")
+ .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg")
+ .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel")
+ .Case("cmovnzq", "cmovneq").Case("cmovnz", "cmovne")
+ .Case("cmovzw", "cmovew") .Case("cmovzl", "cmovel")
+ .Case("cmovzq", "cmoveq") .Case("cmovz", "cmove")
+ .Case("fwait", "wait")
+ .Case("movzx", "movzb") // FIXME: Not correct.
+ .Case("fildq", "fildll")
.Default(Name);
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
- if (PatchedName.startswith("cmp") &&
+ if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ bool IsVCMP = PatchedName.startswith("vcmp");
+ unsigned SSECCIdx = IsVCMP ? 4 : 3;
unsigned SSEComparisonCode = StringSwitch<unsigned>(
- PatchedName.slice(3, PatchedName.size() - 2))
- .Case("eq", 0)
- .Case("lt", 1)
- .Case("le", 2)
- .Case("unord", 3)
- .Case("neq", 4)
- .Case("nlt", 5)
- .Case("nle", 6)
- .Case("ord", 7)
+ PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
+ .Case("eq", 0)
+ .Case("lt", 1)
+ .Case("le", 2)
+ .Case("unord", 3)
+ .Case("neq", 4)
+ .Case("nlt", 5)
+ .Case("nle", 6)
+ .Case("ord", 7)
+ .Case("eq_uq", 8)
+ .Case("nge", 9)
+ .Case("ngt", 0x0A)
+ .Case("false", 0x0B)
+ .Case("neq_oq", 0x0C)
+ .Case("ge", 0x0D)
+ .Case("gt", 0x0E)
+ .Case("true", 0x0F)
+ .Case("eq_os", 0x10)
+ .Case("lt_oq", 0x11)
+ .Case("le_oq", 0x12)
+ .Case("unord_s", 0x13)
+ .Case("neq_us", 0x14)
+ .Case("nlt_uq", 0x15)
+ .Case("nle_uq", 0x16)
+ .Case("ord_s", 0x17)
+ .Case("eq_us", 0x18)
+ .Case("nge_uq", 0x19)
+ .Case("ngt_uq", 0x1A)
+ .Case("false_os", 0x1B)
+ .Case("neq_os", 0x1C)
+ .Case("ge_oq", 0x1D)
+ .Case("gt_oq", 0x1E)
+ .Case("true_us", 0x1F)
.Default(~0U);
if (SSEComparisonCode != ~0U) {
ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
getParser().getContext());
if (PatchedName.endswith("ss")) {
- PatchedName = "cmpss";
+ PatchedName = IsVCMP ? "vcmpss" : "cmpss";
} else if (PatchedName.endswith("sd")) {
- PatchedName = "cmpsd";
+ PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
} else if (PatchedName.endswith("ps")) {
- PatchedName = "cmpps";
+ PatchedName = IsVCMP ? "vcmpps" : "cmpps";
} else {
assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
- PatchedName = "cmppd";
+ PatchedName = IsVCMP ? "vcmppd" : "cmppd";
}
}
}
+
+ // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
+ if (PatchedName.startswith("vpclmul")) {
+ unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
+ PatchedName.slice(7, PatchedName.size() - 2))
+ .Case("lqlq", 0x00) // src1[63:0], src2[63:0]
+ .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
+ .Case("lqhq", 0x10) // src1[63:0], src2[127:64]
+ .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
+ .Default(~0U);
+ if (CLMULQuadWordSelect != ~0U) {
+ ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
+ getParser().getContext());
+ assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
+ PatchedName = "vpclmulqdq";
+ }
+ }
+
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (ExtraImmOp)
Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
-
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+
+ // Determine whether this is an instruction prefix.
+ bool isPrefix =
+ PatchedName == "lock" || PatchedName == "rep" ||
+ PatchedName == "repne";
+
+
+ // This does the actual operand parsing. Don't parse any more if we have a
+ // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+ // just want to parse the "lock" as the first instruction and the "incl" as
+ // the next one.
+ if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
// Parse '*' modifier.
if (getLexer().is(AsmToken::Star)) {
// Read the first operand.
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
- else
+ else {
+ Parser.EatToEndOfStatement();
return true;
+ }
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
- else
+ else {
+ Parser.EatToEndOfStatement();
return true;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Parser.EatToEndOfStatement();
+ return TokError("unexpected token in argument list");
}
}
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ Parser.Lex(); // Consume the EndOfStatement
- // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
+ // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
+ // "shift <op>".
if ((Name.startswith("shr") || Name.startswith("sar") ||
Name.startswith("shl")) &&
+ Operands.size() == 3) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ }
+ }
+
+ // FIXME: Hack to handle recognize "rc[lr] <op>" -> "rcl $1, <op>".
+ if ((Name.startswith("rcl") || Name.startswith("rcr")) &&
+ Operands.size() == 2) {
+ const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext());
+ Operands.push_back(X86Operand::CreateImm(One, NameLoc, NameLoc));
+ std::swap(Operands[1], Operands[2]);
+ }
+
+ // FIXME: Hack to handle recognize "sh[lr]d op,op" -> "shld $1, op,op".
+ if ((Name.startswith("shld") || Name.startswith("shrd")) &&
+ Operands.size() == 3) {
+ const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext());
+ Operands.insert(Operands.begin()+1,
+ X86Operand::CreateImm(One, NameLoc, NameLoc));
+ }
+
+
+ // FIXME: Hack to handle recognize "in[bwl] <op>". Canonicalize it to
+ // "inb <op>, %al".
+ if ((Name == "inb" || Name == "inw" || Name == "inl") &&
+ Operands.size() == 2) {
+ unsigned Reg;
+ if (Name[2] == 'b')
+ Reg = MatchRegisterName("al");
+ else if (Name[2] == 'w')
+ Reg = MatchRegisterName("ax");
+ else
+ Reg = MatchRegisterName("eax");
+ SMLoc Loc = Operands.back()->getEndLoc();
+ Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
+ }
+
+ // FIXME: Hack to handle recognize "out[bwl] <op>". Canonicalize it to
+ // "outb %al, <op>".
+ if ((Name == "outb" || Name == "outw" || Name == "outl") &&
+ Operands.size() == 2) {
+ unsigned Reg;
+ if (Name[3] == 'b')
+ Reg = MatchRegisterName("al");
+ else if (Name[3] == 'w')
+ Reg = MatchRegisterName("ax");
+ else
+ Reg = MatchRegisterName("eax");
+ SMLoc Loc = Operands.back()->getEndLoc();
+ Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
+ std::swap(Operands[1], Operands[2]);
+ }
+
+ // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
+ if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.back();
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+
+ // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
+ // "f{mul*,add*,sub*,div*} $op"
+ if ((Name.startswith("fmul") || Name.startswith("fadd") ||
+ Name.startswith("fsub") || Name.startswith("fdiv")) &&
Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[2])->isReg() &&
+ static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
+ delete Operands[2];
+ Operands.erase(Operands.begin() + 2);
+ }
+
+ // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
+ // B".
+ if (Name.startswith("imul") && Operands.size() == 3 &&
static_cast<X86Operand*>(Operands[1])->isImm() &&
- isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
- cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
- delete Operands[1];
- Operands.erase(Operands.begin() + 1);
+ static_cast<X86Operand*>(Operands.back())->isReg()) {
+ X86Operand *Op = static_cast<X86Operand*>(Operands.back());
+ Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
+ Op->getEndLoc()));
}
+
+ // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+ // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
+ // errors, since its encoding is the most compact.
+ if (Name == "sldt" && Operands.size() == 2 &&
+ static_cast<X86Operand*>(Operands[1])->isMem()) {
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken("sldtw", NameLoc);
+ }
+
+ // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as
+ // synonyms. Our tables only have the "<reg>, <mem>" form, so if we see the
+ // other operand order, swap them.
+ if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq"||
+ Name == "xchg")
+ if (Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[1])->isMem() &&
+ static_cast<X86Operand*>(Operands[2])->isReg()) {
+ std::swap(Operands[1], Operands[2]);
+ }
+
+ // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as
+ // synonyms. Our tables only have the "<mem>, <reg>" form, so if we see the
+ // other operand order, swap them.
+ if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq"||
+ Name == "test")
+ if (Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[1])->isReg() &&
+ static_cast<X86Operand*>(Operands[2])->isMem()) {
+ std::swap(Operands[1], Operands[2]);
+ }
+
+ // The assembler accepts these instructions with no operand as a synonym for
+ // an instruction acting on st(1). e.g. "fxch" -> "fxch %st(1)".
+ if ((Name == "fxch" || Name == "fucom" || Name == "fucomp" ||
+ Name == "faddp" || Name == "fsubp" || Name == "fsubrp" ||
+ Name == "fmulp" || Name == "fdivp" || Name == "fdivrp") &&
+ Operands.size() == 1) {
+ Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"),
+ NameLoc, NameLoc));
+ }
+
+ // The assembler accepts these instructions with two few operands as a synonym
+ // for taking %st(1),%st(0) or X, %st(0).
+ if ((Name == "fcomi" || Name == "fucomi") && Operands.size() < 3) {
+ if (Operands.size() == 1)
+ Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"),
+ NameLoc, NameLoc));
+ Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(0)"),
+ NameLoc, NameLoc));
+ }
+
+ // The assembler accepts various amounts of brokenness for fnstsw.
+ if (Name == "fnstsw") {
+ if (Operands.size() == 2 &&
+ static_cast<X86Operand*>(Operands[1])->isReg()) {
+ // "fnstsw al" and "fnstsw eax" -> "fnstw"
+ unsigned Reg = static_cast<X86Operand*>(Operands[1])->Reg.RegNo;
+ if (Reg == MatchRegisterName("eax") ||
+ Reg == MatchRegisterName("al")) {
+ delete Operands[1];
+ Operands.pop_back();
+ }
+ }
+ // "fnstw" -> "fnstw %ax"
+ if (Operands.size() == 1)
+ Operands.push_back(X86Operand::CreateReg(MatchRegisterName("ax"),
+ NameLoc, NameLoc));
+ }
+
+ // jmp $42,$5 -> ljmp, similarly for call.
+ if ((Name.startswith("call") || Name.startswith("jmp")) &&
+ Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[1])->isImm() &&
+ static_cast<X86Operand*>(Operands[2])->isImm()) {
+ const char *NewOpName = StringSwitch<const char *>(Name)
+ .Case("jmp", "ljmp")
+ .Case("jmpw", "ljmpw")
+ .Case("jmpl", "ljmpl")
+ .Case("jmpq", "ljmpq")
+ .Case("call", "lcall")
+ .Case("callw", "lcallw")
+ .Case("calll", "lcalll")
+ .Case("callq", "lcallq")
+ .Default(0);
+ if (NewOpName) {
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken(NewOpName, NameLoc);
+ Name = NewOpName;
+ }
+ }
+
+ // lcall and ljmp -> lcalll and ljmpl
+ if ((Name == "lcall" || Name == "ljmp") && Operands.size() == 3) {
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken(Name == "lcall" ? "lcalll" : "ljmpl",
+ NameLoc);
+ }
+
+ // movsd -> movsl (when no operands are specified).
+ if (Name == "movsd" && Operands.size() == 1) {
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken("movsl", NameLoc);
+ }
+
+ // fstp <mem> -> fstps <mem>. Without this, we'll default to fstpl due to
+ // suffix searching.
+ if (Name == "fstp" && Operands.size() == 2 &&
+ static_cast<X86Operand*>(Operands[1])->isMem()) {
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken("fstps", NameLoc);
+ }
+
return false;
}
if (getLexer().is(AsmToken::EndOfStatement))
break;
-
+
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return Error(L, "unexpected token in directive");
return false;
}
-/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
-/// imm operand, to having "rm" or "mr" operands with the offset in the disp
-/// field.
-static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
- bool isMR) {
- MCOperand Disp = Inst.getOperand(0);
-
- // Start over with an empty instruction.
- Inst = MCInst();
- Inst.setOpcode(Opc);
-
- if (!isMR)
- Inst.addOperand(MCOperand::CreateReg(RegNo));
-
- // Add the mem operand.
- Inst.addOperand(MCOperand::CreateReg(0)); // Segment
- Inst.addOperand(MCOperand::CreateImm(1)); // Scale
- Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg
- Inst.addOperand(Disp); // Displacement
- Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg
-
- if (isMR)
- Inst.addOperand(MCOperand::CreateReg(RegNo));
-}
-// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
-// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
-// proper mechanism for supporting (ambiguous) feature dependent instructions.
-void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
- if (!Is64Bit) return;
-
- switch (Inst.getOpcode()) {
- case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
- case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
- case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
- case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
- case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
- case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
- case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
- case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
-
- // moffset instructions are x86-32 only.
- case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
- case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
- case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
- case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
- case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
- case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
- }
-}
+bool X86ATTAsmParser::
+MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCInst &Inst) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
-bool
-X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
- &Operands,
- MCInst &Inst) {
+ bool WasOriginallyInvalidOperand = false;
+ unsigned OrigErrorInfo;
+
// First, try a direct match.
- if (!MatchInstructionImpl(Operands, Inst))
+ switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+ case Match_Success:
return false;
-
- // Ignore anything which is obviously not a suffix match.
- if (Operands.size() == 0)
- return true;
- X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
- if (!Op->isToken() || Op->getToken().size() > 15)
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
return true;
+ case Match_InvalidOperand:
+ WasOriginallyInvalidOperand = true;
+ break;
+ case Match_MnemonicFail:
+ break;
+ }
// FIXME: Ideally, we would only attempt suffix matches for things which are
// valid prefixes, and we could just infer the right unambiguous
// type. However, that requires substantially more matcher support than the
// following hack.
+ X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+ assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
// Change the operand to point to a temporary token.
- char Tmp[16];
StringRef Base = Op->getToken();
- memcpy(Tmp, Base.data(), Base.size());
- Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
+ SmallString<16> Tmp;
+ Tmp += Base;
+ Tmp += ' ';
+ Op->setTokenValue(Tmp.str());
// Check for the various suffix matches.
Tmp[Base.size()] = 'b';
- bool MatchB = MatchInstructionImpl(Operands, Inst);
+ unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo;
+ MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo);
Tmp[Base.size()] = 'w';
- bool MatchW = MatchInstructionImpl(Operands, Inst);
+ MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo);
Tmp[Base.size()] = 'l';
- bool MatchL = MatchInstructionImpl(Operands, Inst);
+ MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo);
Tmp[Base.size()] = 'q';
- bool MatchQ = MatchInstructionImpl(Operands, Inst);
+ MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo);
// Restore the old token.
Op->setTokenValue(Base);
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
- if (MatchB + MatchW + MatchL + MatchQ == 3)
+ unsigned NumSuccessfulMatches =
+ (MatchB == Match_Success) + (MatchW == Match_Success) +
+ (MatchL == Match_Success) + (MatchQ == Match_Success);
+ if (NumSuccessfulMatches == 1)
return false;
- // Otherwise, the match failed.
+ // Otherwise, the match failed, try to produce a decent error message.
+
+ // If we had multiple suffix matches, then identify this as an ambiguous
+ // match.
+ if (NumSuccessfulMatches > 1) {
+ char MatchChars[4];
+ unsigned NumMatches = 0;
+ if (MatchB == Match_Success)
+ MatchChars[NumMatches++] = 'b';
+ if (MatchW == Match_Success)
+ MatchChars[NumMatches++] = 'w';
+ if (MatchL == Match_Success)
+ MatchChars[NumMatches++] = 'l';
+ if (MatchQ == Match_Success)
+ MatchChars[NumMatches++] = 'q';
+
+ SmallString<126> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ambiguous instructions require an explicit suffix (could be ";
+ for (unsigned i = 0; i != NumMatches; ++i) {
+ if (i != 0)
+ OS << ", ";
+ if (i + 1 == NumMatches)
+ OS << "or ";
+ OS << "'" << Base << MatchChars[i] << "'";
+ }
+ OS << ")";
+ Error(IDLoc, OS.str());
+ return true;
+ }
+
+ // Okay, we know that none of the variants matched successfully.
+
+ // If all of the instructions reported an invalid mnemonic, then the original
+ // mnemonic was invalid.
+ if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) &&
+ (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) {
+ if (!WasOriginallyInvalidOperand) {
+ Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
+ return true;
+ }
+
+ // Recover location info for the operand if we know which was the problem.
+ SMLoc ErrorLoc = IDLoc;
+ if (OrigErrorInfo != ~0U) {
+ if (OrigErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ // If one instruction matched with a missing feature, report this as a
+ // missing feature.
+ if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) +
+ (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+
+ // If one instruction matched with an invalid operand, report this as an
+ // operand failure.
+ if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) +
+ (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){
+ Error(IDLoc, "invalid operand for instruction");
+ return true;
+ }
+
+ // If all of these were an outright failure, report it in a useless way.
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
return true;
}
LLVMInitializeX86AsmLexer();
}
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
#include "X86GenAsmMatcher.inc"