From c918d6043b823173b106c163038b14c1e2f92765 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Tue, 4 May 2010 16:12:42 +0000 Subject: [PATCH] MC/X86: Add "support" for matching ATT style mnemonic prefixes. - The idea is that when a match fails, we just try to match each of +'b', +'w', +'l'. If exactly one matches, we assume this is a mnemonic prefix and accept it. If all match, we assume it is width generic, and take the 'l' form. - This would be a horrible hack, if it weren't so simple. Therefore it is an elegant solution! Chris gets the credit for this particular elegant solution. :) - Next step to making this more robust is to have the X86 matcher generate the mnemonic prefix information. Ideally we would also compute up-front exactly which mnemonic to attempt to match, but this may require more custom code in the matcher than is really worth it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103012 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 65 ++++++++++++++++++- lib/Target/X86/X86.td | 5 +- .../MC/AsmParser/X86/x86_64-suffix-matching.s | 8 +++ 3 files changed, 74 insertions(+), 4 deletions(-) create mode 100644 test/MC/AsmParser/X86/x86_64-suffix-matching.s diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index da013505dab..aaa1c0620ba 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -51,11 +51,14 @@ private: void InstructionCleanup(MCInst &Inst); /// @name Auto-generated Match Functions - /// { + /// { bool MatchInstruction(const SmallVectorImpl &Operands, MCInst &Inst); + bool MatchInstructionImpl( + const SmallVectorImpl &Operands, MCInst &Inst); + /// } public: @@ -132,7 +135,7 @@ struct X86Operand : public MCParsedAsmOperand { X86Operand(KindTy K, SMLoc Start, SMLoc End) : Kind(K), StartLoc(Start), EndLoc(End) {} - + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. @@ -142,6 +145,11 @@ struct X86Operand : public MCParsedAsmOperand { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); } + void setTokenValue(StringRef Value) { + assert(Kind == Token && "Invalid access!"); + Tok.Data = Value.data(); + Tok.Length = Value.size(); + } unsigned getReg() const { assert(Kind == Register && "Invalid access!"); @@ -632,6 +640,59 @@ void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) { } } +bool +X86ATTAsmParser::MatchInstruction(const SmallVectorImpl + &Operands, + MCInst &Inst) { + // First, try a direct match. + if (!MatchInstructionImpl(Operands, Inst)) + return false; + + // Ignore anything which is obviously not a suffix match. + if (Operands.size() == 0) + return true; + X86Operand *Op = static_cast(Operands[0]); + if (!Op->isToken() || Op->getToken().size() > 15) + return true; + + // FIXME: Ideally, we would only attempt suffix matches for things which are + // valid prefixes, and we could just infer the right unambiguous + // type. However, that requires substantially more matcher support than the + // following hack. + + // Change the operand to point to a temporary token. + char Tmp[16]; + StringRef Base = Op->getToken(); + memcpy(Tmp, Base.data(), Base.size()); + Op->setTokenValue(StringRef(Tmp, Base.size() + 1)); + + // Check for the various suffix matches. + Tmp[Base.size()] = 'b'; + bool MatchB = MatchInstructionImpl(Operands, Inst); + Tmp[Base.size()] = 'w'; + bool MatchW = MatchInstructionImpl(Operands, Inst); + Tmp[Base.size()] = 'l'; + bool MatchL = MatchInstructionImpl(Operands, Inst); + + // Restore the old token. + Op->setTokenValue(Base); + + // If exactly one matched, then we treat that as a successful match (and the + // instruction will already have been filled in correctly, since the failing + // matches won't have modified it). + if (MatchB + MatchW + MatchL == 2) + return false; + + // Similarly, if all three matched then we assume this is a generic operation + // involving memory, and take the 'l' form (to match 'gas'). + if (MatchB + MatchW + MatchL == 0) + return false; + + // Otherwise, the match failed. + return true; +} + + extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index ec86fc248e3..a53f973c1c4 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -179,8 +179,9 @@ include "X86CallingConv.td" // Currently the X86 assembly parser only supports ATT syntax. def ATTAsmParser : AsmParser { - string AsmParserClassName = "ATTAsmParser"; - string AsmParserInstCleanup = "InstructionCleanup"; + string AsmParserClassName = "ATTAsmParser"; + string AsmParserInstCleanup = "InstructionCleanup"; + string MatchInstructionName = "MatchInstructionImpl"; int Variant = 0; // Discard comments in assembly strings. diff --git a/test/MC/AsmParser/X86/x86_64-suffix-matching.s b/test/MC/AsmParser/X86/x86_64-suffix-matching.s new file mode 100644 index 00000000000..9a38e1bb06e --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-suffix-matching.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -triple x86_64 -o - %s | FileCheck %s + +// CHECK: addl $0, %eax + add $0, %eax +// CHECK: addb $255, %al + add $0xFF, %al +// CHECK: addl $0, (%rax) + add $0, 0(%rax) -- 2.34.1